# Natural Language Processing

## Importing the libraries

In [49]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [50]:
dataset = pd.read_csv('fixed_testing.csv')


In [51]:
print(df)

    We are thrilled that you have chosen ABC Industries for your needs. We appreciate your trust and confidence in us and are committed to providing you with exceptional service. If you have any questions or concerns, please don't hesitate to reach out to us.  \
0    Congratulations on your recent purchase with X...                                                                                                                                                                                                                
1    I wanted to personally welcome you to the team...                                                                                                                                                                                                                
2    I wanted to take a moment to thank you for you...                                                                                                                                                             

## Cleaning the texts

In [52]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []
for i in range(0, 749):
  review = re.sub('[^a-zA-Z]', ' ', dataset.iloc[:, 0][i])
  review = review.lower()
  review = review.split()
  ps = PorterStemmer()
  all_stopwords = stopwords.words('english')
  all_stopwords.remove('not')
  review = [ps.stem(word) for word in review if not word in set(all_stopwords)]
  review = ' '.join(review)
  corpus.append(review)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [53]:
print(corpus)

['congratul recent purchas xyz co hope enjoy new product find everyth look question concern pleas feel free contact support team', 'want person welcom team def solut thrill onboard wait see amaz work produc question need help get settl pleas hesit reach', 'want take moment thank recent busi ghi corp appreci trust confid compani look forward continu work futur need anyth pleas hesit contact us', 'want take moment congratul recent promot jkl inc hard work dedic paid excit see great thing accomplish new role keep excel work', 'thank place order mno product appreci busi hope satisfi purchas anyth els pleas hesit ask', 'behalf team pqr servic want extend warm welcom commun excit join us look forward get know better question need help get start pleas hesit reach', 'want take moment express sincer gratitud recent donat stu foundat generos help us continu import work make posit impact live mani peopl thank support', 'congratul recent graduat xyz univers proud accomplish excit see futur take ke

## Creating the Bag of Words model

In [54]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus).toarray()

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
y = dataset.iloc[:, -1].values
y = y.reshape(-1, 1)
y = np.array(ct.fit_transform(y))


In [55]:
print(y)

[[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 ...
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]


## Splitting the dataset into the Training set and Test set

In [56]:
from sklearn.model_selection import train_test_split
y = np.argmax(y, axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

## Training the Naive Bayes model on the Training set

In [57]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

## Predicting the Test set results

In [58]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[1 1]
 [2 2]
 [2 1]
 [2 2]
 [0 0]
 [0 0]
 [0 0]
 [2 2]
 [1 1]
 [2 2]
 [2 2]
 [2 2]
 [0 0]
 [2 2]
 [1 1]
 [1 1]
 [0 0]
 [0 2]
 [2 2]
 [2 2]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [2 2]
 [2 2]
 [1 1]
 [2 2]
 [0 2]
 [2 1]
 [0 0]
 [1 2]
 [0 0]
 [2 2]
 [2 2]
 [1 1]
 [0 0]
 [0 0]
 [2 2]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [2 2]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [2 2]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 2]
 [0 0]
 [0 0]
 [2 2]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [2 2]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [1 1]
 [1 1]
 [0 0]
 [2 2]
 [2 2]
 [0 1]
 [0 1]
 [0 0]
 [1 2]
 [1 0]
 [0 0]
 [2 2]
 [0 0]
 [0 0]
 [2 2]
 [2 2]
 [2 2]
 [0 0]
 [1 1]
 [2 2]
 [0 0]
 [1 1]
 [2 2]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [2 2]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 2]
 [2 2]
 [1 1]
 [2 2]
 [0 1]
 [1 1]
 [1 1]
 [1 1]
 [2 2]
 [1 1]
 [0 2]
 [1 1]
 [0 1]
 [1 1]
 [0 0]
 [2 2]
 [2 2]
 [0 0]
 [0 2]
 [0 1]
 [0 0]
 [1 1]
 [2 2]
 [2 0]
 [0 1]

## Making the Confusion Matrix

In [59]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[52  1  1]
 [ 9 40  2]
 [ 6  3 36]]


0.8533333333333334