### Multinomial Event Model

In [1]:
x = ["This was awesome an awesome movie",
     "Great movie! I liked it a lot",
     "Happy Ending! awesome acting by the hero",
     "loved it! truly great",
     "bad not upto the mark",
     "could have been better",
     "Surely a Disappointing movie"]

y = [1,1,1,1,0,0,0] # 1 - Positive, 0 - Negative Class

In [2]:
x_test = ["I was happy & happy and I loved the acting in the movie",
          "The movie I saw was bad"]

### 1. Cleaning

In [3]:
import clean_text as ct

In [4]:
x_clean = [ct.getCleanReview(i) for i in x] #List Comprehension
xt_clean = [ct.getCleanReview(i) for i in x_test]

In [5]:
print(x_clean)
print(xt_clean)

['awesom awesom movi', 'great movi like lot', 'happi end awesom act hero', 'love truli great', 'bad upto mark', 'could better', 'sure disappoint movi']
['happi happi love act movi', 'movi saw bad']


### 2.Vectorization

In [6]:
from sklearn.feature_extraction.text import CountVectorizer

In [7]:
cv = CountVectorizer(ngram_range=(1,2))

x_vec = cv.fit_transform(x_clean).toarray()
print(x_vec)
print(x_vec.shape)

[[0 0 2 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0]
 [1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0]
 [0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1]
 [0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0]]
(7, 34)


In [8]:
print(cv.get_feature_names())

['act', 'act hero', 'awesom', 'awesom act', 'awesom awesom', 'awesom movi', 'bad', 'bad upto', 'better', 'could', 'could better', 'disappoint', 'disappoint movi', 'end', 'end awesom', 'great', 'great movi', 'happi', 'happi end', 'hero', 'like', 'like lot', 'lot', 'love', 'love truli', 'mark', 'movi', 'movi like', 'sure', 'sure disappoint', 'truli', 'truli great', 'upto', 'upto mark']


In [9]:
## Vectorization on the test set
xt_vec = cv.transform(xt_clean).toarray()
print(xt_vec)
print(cv.get_feature_names())
print(xt_vec.shape)

[[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]]
['act', 'act hero', 'awesom', 'awesom act', 'awesom awesom', 'awesom movi', 'bad', 'bad upto', 'better', 'could', 'could better', 'disappoint', 'disappoint movi', 'end', 'end awesom', 'great', 'great movi', 'happi', 'happi end', 'hero', 'like', 'like lot', 'lot', 'love', 'love truli', 'mark', 'movi', 'movi like', 'sure', 'sure disappoint', 'truli', 'truli great', 'upto', 'upto mark']
(2, 34)


### 3. Multinomial Naive Bayes

In [10]:
from sklearn.naive_bayes import MultinomialNB,BernoulliNB, GaussianNB

In [11]:
mnb = MultinomialNB()
print(mnb)
# Training 
mnb.fit?
mnb.fit(x_vec,y)
#Predictions
print(mnb.predict(xt_vec))
print(mnb.predict_proba(xt_vec)) #gives us the posterior probabilities
print(mnb.score(x_vec,y))

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
[1 0]
[[0.09580319 0.90419681]
 [0.61972801 0.38027199]]
1.0


## 3. Multinomial Naive Bayes

In [37]:
bnb = BernoulliNB(binarize=0.0) #if features were not already in binary from it will set them
# to binary by setting a threshold. deafult threshold =0 so any no greater than threshold will
# be considered positive and less than threshold negative
print(bnb)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)


In [38]:
bnb.fit(x_vec,y)
print(bnb.predict_proba(xt_vec))
print(bnb.predict(xt_vec))
print(bnb.score(x_vec,y))

[[0.10638608 0.89361392]
 [0.76046221 0.23953779]]
[1 0]
1.0


## 5. Gaussian Naive Bayes

In [39]:
gnb = GaussianNB()
bnb.fit(x_vec,y)
bnb.predict_proba(xt_vec)

array([[0.10638608, 0.89361392],
       [0.76046221, 0.23953779]])

In [40]:
print(bnb.predict(xt_vec))
print(bnb.score(x_vec,y))

[1 0]
1.0
