## Multinomial Event Model

In [1]:
# Movie Review Prediction
x = ["This was awesome an awesome movie",
     "Great movie! I liked it a lot",
     "Happy Ending! awesome acting by the hero",
     "loved it! truly great",
     "bad not upto the mark",
     "could have been better",
     "Surely a Disappointing movie"]

y = [1,1,1,1,0,0,0] # 1 - Positive, 0 - Negative Class

In [2]:
x_test = ["I was happy & happy and I loved the acting in the movie",
          "The movie I saw was bad"]

## 1. Cleaning

In [3]:
import clean_text as ct

In [4]:
x_clean = [ct.getCleanReview(review) for review in x]
x_test_clean = [ct.getCleanReview(review) for review in x_test]

In [5]:
print(x_clean)

['awesom awesom movi', 'great movi like lot', 'happi end awesom act hero', 'love truli great', 'bad upto mark', 'could better', 'sure disappoint movi']


In [6]:
print(x_test_clean)

['happi happi love act movi', 'movi saw bad']


## 2. Vectorization

In [7]:
from sklearn.feature_extraction.text import CountVectorizer

In [8]:
# cv = CountVectorizer()
# cv = CountVectorizer(ngram_range=(1,1))
cv = CountVectorizer(ngram_range=(1,2))   # Bog of words model

x_vec = cv.fit_transform(x_clean).toarray()
print(x_vec)
print(x_vec.shape)

[[0 0 2 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0]
 [1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0]
 [0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1]
 [0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0]]
(7, 34)


In [9]:
print(cv.get_feature_names())

['act', 'act hero', 'awesom', 'awesom act', 'awesom awesom', 'awesom movi', 'bad', 'bad upto', 'better', 'could', 'could better', 'disappoint', 'disappoint movi', 'end', 'end awesom', 'great', 'great movi', 'happi', 'happi end', 'hero', 'like', 'like lot', 'lot', 'love', 'love truli', 'mark', 'movi', 'movi like', 'sure', 'sure disappoint', 'truli', 'truli great', 'upto', 'upto mark']


In [10]:
## Vectorization on the test set
x_test_vec = cv.transform(x_test_clean).toarray()
print(x_test_vec)
print(x_test_vec.shape)

[[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]]
(2, 34)


## 3. Multinomial Naive Bayes

In [11]:
from sklearn.naive_bayes import MultinomialNB,BernoulliNB,GaussianNB

In [12]:
mnb = MultinomialNB()
print(mnb)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)


In [13]:
# Training
mnb.fit(x_vec,y)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [14]:
# Prediction
y_test_pred = mnb.predict(x_test_vec)

In [15]:
print(y_test_pred)

[1 0]


In [16]:
# To get Posterior Probabilities for each class
mnb.predict_proba(x_test_vec)

array([[0.09580319, 0.90419681],
       [0.61972801, 0.38027199]])

In [17]:
print(mnb.score(x_vec,y))

1.0


## 4. Multivariate Bernoulli Event Model Naive Bayes

In [18]:
bnb = BernoulliNB()

In [19]:
print(bnb)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)


In [20]:
# Training
bnb.fit(x_vec,y)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [21]:
# Prediction
y_test_pred = bnb.predict(x_test_vec)

In [22]:
print(y_test_pred)

[1 0]


In [23]:
# To get Posterior Probabilities for each class
bnb.predict_proba(x_test_vec)

array([[0.10638608, 0.89361392],
       [0.76046221, 0.23953779]])

In [24]:
bnb.score(x_vec,y)

1.0