In [46]:
import pandas as pd
import numpy as np

In [47]:
train_df = pd.read_csv("movie_review_train.csv")
train_df['class'] = train_df['class'].map({'Pos': 1, 'Neg': 0})
train_df.head()

Unnamed: 0,class,text
0,1,a common complaint amongst film critics is ...
1,1,whew this film oozes energy the kind of b...
2,1,steven spielberg s amistad which is bas...
3,1,he has spent his entire life in an awful litt...
4,1,being that it is a foreign language film with...


In [48]:
train_df.shape

(1600, 2)

In [49]:
test_df = pd.read_csv("movie_review_test.csv")
test_df['class'] = test_df['class'].map({'Pos': 1, 'Neg': 0})
test_df.head()

Unnamed: 0,class,text
0,1,films adapted from comic books have had plent...
1,1,every now and then a movie comes along from a...
2,1,you ve got mail works alot better than it des...
3,1,jaws is a rare film that grabs your atte...
4,1,moviemaking is a lot like being the general m...


In [50]:
test_df.shape

(400, 2)

In [51]:
X_train = train_df['text'].values
y_train = train_df['class'].values
X_test = test_df['text'].values
y_test = test_df['class'].values

In [52]:
from sklearn.feature_extraction.text import CountVectorizer

vector = CountVectorizer(stop_words='english', min_df=.03, max_df=.8)
vector.fit(X_train)

In [53]:
len(vector.vocabulary_)

1643

In [54]:
X_train_transformed = vector.transform(X_train)

In [55]:
X_test_transformed = vector.transform(X_test)

In [62]:
test_vector_df = pd.DataFrame(
    X_test_transformed.toarray(), 
             columns=vector.get_feature_names_out()
             )

sum_entity = test_vector_df.sum(axis=1)

sum_entity[sum_entity == 0]


Series([], dtype: int64)

In [68]:
X_test_transformed.count_nonzero()

51663

In [69]:
from sklearn.naive_bayes import BernoulliNB

bnb = BernoulliNB()

# Fit model with train dataset
bnb = bnb.fit(X_train_transformed, y_train)


bnb_predict = bnb.predict(X_test_transformed)

# Test dataset
bnb_prob = bnb.predict_proba(X_test_transformed)

print(bnb_prob)

[[9.48171309e-03 9.90518287e-01]
 [6.49769619e-03 9.93502304e-01]
 [3.94902967e-01 6.05097033e-01]
 [2.04992401e-12 1.00000000e+00]
 [9.98827241e-01 1.17275875e-03]
 [1.78941734e-12 1.00000000e+00]
 [5.94773559e-07 9.99999405e-01]
 [2.02838492e-03 9.97971615e-01]
 [9.17950086e-01 8.20499136e-02]
 [9.44944057e-02 9.05505594e-01]
 [9.97389573e-04 9.99002610e-01]
 [8.55906408e-04 9.99144094e-01]
 [3.30288186e-03 9.96697118e-01]
 [2.50950625e-02 9.74904938e-01]
 [7.32820406e-05 9.99926718e-01]
 [5.14487298e-01 4.85512702e-01]
 [9.77854644e-01 2.21453565e-02]
 [2.42691437e-09 9.99999998e-01]
 [9.44024181e-03 9.90559758e-01]
 [9.91992498e-01 8.00750156e-03]
 [9.66449102e-01 3.35508975e-02]
 [4.86743623e-03 9.95132564e-01]
 [9.99484407e-01 5.15592846e-04]
 [2.34114640e-08 9.99999977e-01]
 [1.30729625e-02 9.86927037e-01]
 [8.52461780e-03 9.91475382e-01]
 [4.42578499e-01 5.57421501e-01]
 [7.63045646e-17 1.00000000e+00]
 [8.84649210e-07 9.99999115e-01]
 [3.62045689e-12 1.00000000e+00]
 [3.444493

In [70]:
def evaluate_model(y_test: np.array, y_predict: np.array):

    from sklearn import metrics

    confusion_matrix = metrics.confusion_matrix(y_test, y_predict)


    # Get values
    TN = confusion_matrix[0, 0]
    FP = confusion_matrix[0, 1]
    FN = confusion_matrix[1, 0]
    TP = confusion_matrix[1, 1]


    sensitivity = TP / float(FN + TP)
    print("sensitivity",sensitivity)

    specificity = TN / float(TN + FP)
    print("specificity",specificity)

    precision = TP / float(TP + FP)
    print("precision",precision)

    print("PRECISION SCORE :",metrics.precision_score(y_test, y_predict))
    print("RECALL SCORE :", metrics.recall_score(y_test, y_predict))
    print("F1 SCORE :",metrics.f1_score(y_test, y_predict))


In [71]:
evaluate_model(y_test=y_test, y_predict=bnb_predict)

sensitivity 0.695
specificity 0.885
precision 0.8580246913580247
PRECISION SCORE : 0.8580246913580247
RECALL SCORE : 0.695
F1 SCORE : 0.7679558011049723


In [73]:
from sklearn import metrics

bnb_accuracy_score = metrics.accuracy_score(y_test, bnb_predict)
bnb_accuracy_score

0.79

In [75]:
bnb_confusion_matrix = metrics.confusion_matrix(y_test, bnb_predict)
bnb_confusion_matrix

array([[177,  23],
       [ 61, 139]])

In [76]:
FP = bnb_confusion_matrix[0, 1]
FP

23

In [78]:
s1="abc"  

s2="def"

# Write your code below
s1_set = set([c for c in s1 if c.replace(' ', '')!=''])
s2_set = set([c for c in s2 if c.replace(' ', '')!=''])

common_letters = sorted(s1_set.intersection(s2_set))

if len(common_letters) > 0:
    print("The common letters are :")
    for c in common_letters:
        print(c)
else:
    print("There are no common letters between them")

There are no common letters between them


In [103]:
input_list=[8, 10, 14]

mn = min(input_list)   # Get the first no of the list
mx= max(input_list)    # Get the last no of the list
n= mx-mn+1
total = n*(2*mn + n-1)/2
sum_of_input = sum(input_list) 
print(int(total - sum_of_input))

45


In [104]:
n = ((max(input_list) - min(input_list))/2) + 1

n

4.0

In [107]:
total = ((min(input_list) + max(input_list)) * n)/2

total

44.0

In [108]:
total - sum(input_list)

12.0