# Bayesian Network - Documentation

## Importing Necessary Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

## Importing Dataset

In [2]:
msg = pd.read_csv("naivetext.csv", names=["message", "label"])
print("The dimensions of the dataset", msg.shape)

The dimensions of the dataset (18, 2)


## Splitting the Dataset

In [3]:
msg["labelnum"] = msg.label.map({"pos": 1, "neg": 0})
X = msg['message']
y = msg['labelnum']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_test.shape)
print(X_train.shape)
print(y_test.shape)
print(y_train.shape)
print("train data")
print(X_train)

(5,)
(13,)
(5,)
(13,)
train data
8                   He is my sworn enemy
13     I am sick and tired of this place
17      I went to my enemy's house today
2     I feel very good about these beers
6               I am tired of this stuff
5          I do not like this restaurant
14                  What a great holiday
15        That is a bad locality to stay
12                       I love to dance
10              This is an awesome place
4                   What an awesome view
16        We will have good fun tomorrow
0                   I love this sandwich
Name: message, dtype: object


## Preprocessing Text Data (using basic NLP)

In [5]:
count_vect = CountVectorizer()
X_train_dtm = count_vect.fit_transform(X_train)
X_test_dtm = count_vect.transform(X_test)
print(count_vect.get_feature_names())
df = pd.DataFrame(X_train_dtm.toarray(), columns=count_vect.get_feature_names())
print(df)
print(X_train_dtm)

['about', 'am', 'an', 'and', 'awesome', 'bad', 'beers', 'dance', 'do', 'enemy', 'feel', 'fun', 'good', 'great', 'have', 'he', 'holiday', 'house', 'is', 'like', 'locality', 'love', 'my', 'not', 'of', 'place', 'restaurant', 'sandwich', 'sick', 'stay', 'stuff', 'sworn', 'that', 'these', 'this', 'tired', 'to', 'today', 'tomorrow', 'very', 'view', 'we', 'went', 'what', 'will']
    about  am  an  and  awesome  bad  beers  dance  do  enemy  ...  tired  to  \
0       0   0   0    0        0    0      0      0   0      1  ...      0   0   
1       0   1   0    1        0    0      0      0   0      0  ...      1   0   
2       0   0   0    0        0    0      0      0   0      1  ...      0   1   
3       1   0   0    0        0    0      1      0   0      0  ...      0   0   
4       0   1   0    0        0    0      0      0   0      0  ...      1   0   
5       0   0   0    0        0    0      0      0   1      0  ...      0   0   
6       0   0   0    0        0    0      0      0   0    

## Multinomial Naive Bayes Model

In [6]:
clf = MultinomialNB().fit(X_train_dtm, y_train)
predicted = clf.predict(X_test_dtm)

In [7]:
print("Accuracy metrics")
print("Accuracy of the classifer is", metrics.accuracy_score(y_test, predicted)*100)
print("Confusion matrix")
print(metrics.confusion_matrix(y_test, predicted))
print("Recall and Precison ")
print(metrics.recall_score(y_test, predicted))
print('Prediction: ',metrics.precision_score(y_test, predicted)*100)

Accuracy metrics
Accuracy of the classifer is 80.0
Confusion matrix
[[3 0]
 [1 1]]
Recall and Precison 
0.5
Prediction:  100.0


## Prediction

In [15]:
docs_new = ['I like this place']
X_new_counts = count_vect.transform(docs_new)
predictednew = clf.predict(X_new_counts)
for doc, category in zip(docs_new, predictednew):
    print('%s || %s' % (doc, msg.labelnum[category]))

I like this place || 1
