## Multinomial Event Model

In [1]:
x = ["This was an awesome movie",
    "Great movie! I liked it a lot",
    "Happy Ending! Awesome acting by the hero",
    "Loved it! truly great",
    "Bad, not upto the mark",
    "Could have been better",
    "Surely a disapppointing movie"]

y = [1,1,1,1,0,0,0] # 1 - Positive, 0 - Negative

In [2]:
x_test = ["I was happy & I loved the acting in the movie",
         "The movie I saw was bad"] # writing not good/bad gives opposite result as not is present in stopwords 

### 1. Cleaning

In [3]:
import clean_text as ct

In [6]:
x_clean = [ct.getCleanReview(i) for i in x]
xt_clean = [ct.getCleanReview(i) for i in x_test]

In [7]:
print(x_clean)

['awesom movi', 'great movi like lot', 'happi end awesom act hero', 'love truli great', 'bad upto mark', 'could better', 'sure disapppoint movi']


### 2. Vectorization

In [8]:
from sklearn.feature_extraction.text import CountVectorizer

In [9]:
cv = CountVectorizer()

In [10]:
x_vec = cv.fit_transform(x_clean).toarray()
print(x_vec)
print(x_vec.shape)

[[0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0]
 [1 1 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1]
 [0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0]]
(7, 18)


In [12]:
print(cv.get_feature_names_out())

['act' 'awesom' 'bad' 'better' 'could' 'disapppoint' 'end' 'great' 'happi'
 'hero' 'like' 'lot' 'love' 'mark' 'movi' 'sure' 'truli' 'upto']


In [14]:
## Vectorization on the test set
xt_vec = cv.transform(xt_clean).toarray()
print(xt_vec)
print(xt_vec.shape)

[[1 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]]
(2, 18)


### Multinomial Naive Bayes

In [15]:
from sklearn.naive_bayes import MultinomialNB,BernoulliNB,GaussianNB

In [16]:
mnb = MultinomialNB()
print(mnb)

MultinomialNB()


In [17]:
# Training
mnb.fit(x_vec,y)

In [18]:
mnb.predict(xt_vec)

array([1, 0])