In [58]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score

In [59]:
message = pd.read_csv('sand.csv', names=['message', 'label'])
print("Total Instances of Dataset : {}".format(message.shape[0]))
message['labelNum'] = message.label.map({'pos' : 1, 'neg' : 0})

Total Instances of Dataset : 18


In [60]:
X = message.message
Y = message.labelNum
xTrain, xTest, yTrain, yTest = train_test_split(X, Y)

In [61]:
countV = CountVectorizer()
xTrainDM = countV.fit_transform(xTrain)
xTestDM = countV.transform(xTest)

In [62]:
df = pd.DataFrame(xTrainDM.toarray(), columns=countV.get_feature_names_out())
print(df[0:5])

   about  am  amazing  an  awesome  bad  beers  best  can  dance  ...  that  \
0      0   0        0   0        0    0      0     0    0      0  ...     0   
1      0   0        0   0        0    0      0     0    1      0  ...     0   
2      0   0        0   1        1    0      0     0    0      0  ...     0   
3      0   1        0   0        0    0      0     0    0      0  ...     0   
4      1   0        0   0        0    0      1     0    0      0  ...     0   

   these  this  tired  to  very  view  what  with  work  
0      0     1      0   0     0     0     0     0     0  
1      0     1      0   0     0     0     0     1     0  
2      0     0      0   0     0     1     1     0     0  
3      0     1      1   0     0     0     0     0     0  
4      1     0      0   0     1     0     0     0     0  

[5 rows x 41 columns]


In [63]:
classifier = MultinomialNB()
classifier.fit(xTrainDM, yTrain)
yPred = classifier.predict(xTestDM)

In [64]:
for doc, p in zip(xTrain, yPred):
    p = '+ve' if p == 1 else '-ve'
    print("{} -> {}".format(doc, p))

I do not like this restaurant -> +ve
I can't deal with this -> -ve
What an awesome view -> -ve
I am tired of this stuff -> -ve
I feel very good about these beers -> +ve


In [65]:
print("Accuracy Metrics : ")
print("Accuracy : {}".format(accuracy_score(yTest, yPred)))
print("Recall : {}".format(recall_score(yTest, yPred)))
print("Precision : {}".format(precision_score(yTest, yPred)))
print("Confusion : \n{}".format(confusion_matrix(yTest, yPred)))

Accuracy Metrics : 
Accuracy : 0.8
Recall : 1.0
Precision : 0.5
Confusion : 
[[3 1]
 [0 1]]
