In [None]:
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import f1_score

# Linear SVM

In [None]:
imdb_bbow_linearsvm_clf = LinearSVC(max_iter=15000)
list_C = [2*i for i in range(1, 5)]
list_tol = [10**(-i*2) for i in range(1, 4)]

tuned_parameters = [{'C': list_C, 'tol': list_tol}]
imdb_bbow_linearsvm_clf = GridSearchCV(imdb_bbow_linearsvm_clf, tuned_parameters, scoring='f1_micro', cv=3, verbose=2)

In [None]:
imdb_bbow_linearsvm_clf = imdb_bbow_linearsvm_clf.fit(imdb_vectors_train_binary, imdb_train_output)

In [None]:
imdb_binary_linearsvm_best_params = imdb_bbow_linearsvm_clf.best_params_

In [None]:
imdb_binary_linearsvm_best_params

In [None]:
imdb_bbow_linearsvm_clf = LinearSVC(tol=imdb_binary_linearsvm_best_params['tol'],
                                    C=imdb_binary_linearsvm_best_params['C'])

In [None]:
imdb_bbow_linearsvm_clf = imdb_bbow_linearsvm_clf.fit(imdb_vectors_train_binary, imdb_train_output)

imdb_bbow_linearsvm_train_pred = imdb_bbow_linearsvm_clf.predict(imdb_vectors_train_binary)
imdb_bbow_linearsvm_valid_pred = imdb_bbow_linearsvm_clf.predict(imdb_vectors_valid_binary)
imdb_bbow_linearsvm_test_pred = imdb_bbow_linearsvm_clf.predict(imdb_vectors_test_binary)

imdb_bbow_linearsvm_train_f1 = f1_score(imdb_train_output, imdb_bbow_linearsvm_train_pred, average='micro')
imdb_bbow_linearsvm_valid_f1 = f1_score(imdb_valid_output, imdb_bbow_linearsvm_valid_pred, average='micro')
imdb_bbow_linearsvm_test_f1 = f1_score(imdb_test_output, imdb_bbow_linearsvm_test_pred, average='micro')

In [None]:
print("IMDB Binary BoW Linear SVM Train F1 Score:", imdb_bbow_linearsvm_train_f1)
print("IMDB Binary BoW Linear SVM Valid F1 Score:", imdb_bbow_linearsvm_valid_f1)
print("IMDB Binary BoW Linear SVM Test F1 Score:", imdb_bbow_linearsvm_test_f1)
print("IMDB Best Binary BoW Linear SVM Parameters: ", imdb_binary_linearsvm_best_params)

#  Bernoulli Naive Bayes

In [None]:
alpha_vals = [i/50 for i in range(1, 51)]
imdb_bbow_bernoullinb_valid_f1_scores = []
for val_alpha in tqdm(alpha_vals):
    imdb_bbow_bernoullinb_clf = BernoulliNB(alpha=val_alpha)
    imdb_bbow_bernoullinb_clf = imdb_bbow_bernoullinb_clf.fit(imdb_vectors_train_binary, imdb_train_output)
    imdb_bbow_bernoullinb_valid_pred = imdb_bbow_bernoullinb_clf.predict(imdb_vectors_valid_binary)
    f1 = f1_score(imdb_valid_output, imdb_bbow_bernoullinb_valid_pred, average='micro')
    imdb_bbow_bernoullinb_valid_f1_scores.append(f1)

In [None]:
plt.plot(alpha_vals, imdb_bbow_bernoullinb_valid_f1_scores, 'g', label='F1 Scores')
plt.xlabel('Alpha values')
plt.ylabel('F1 Scores')
plt.legend()
plt.show()

In [None]:
imdb_max_index = imdb_bbow_bernoullinb_valid_f1_scores.index(max(imdb_bbow_bernoullinb_valid_f1_scores))
imdb_best_bbow_bernoullinb_alpha = alpha_vals[max_index]

In [None]:
imdb_best_bbow_bernoullinb_alpha

In [None]:
imdb_bbow_bernoullinb_train_f1 = get_bbow_bernoullinb_f1(imdb_vectors_train_binary, imdb_train_output, imdb_best_bbow_bernoullinb_alpha, imdb_vectors_train_binary, imdb_train_output)
imdb_bbow_bernoullinb_valid_f1 = get_bbow_bernoullinb_f1(imdb_vectors_train_binary, imdb_train_output, imdb_best_bbow_bernoullinb_alpha, imdb_vectors_valid_binary, imdb_valid_output)
imdb_bbow_bernoullinb_test_f1 = get_bbow_bernoullinb_f1(imdb_vectors_train_binary, imdb_train_output, imdb_best_bbow_bernoullinb_alpha, imdb_vectors_test_binary, imdb_test_output)

In [None]:
print("IMDB Binary BoW BernoulliNB Train F1 Score:", imdb_bbow_bernoullinb_train_f1)
print("IMDB Binary BoW BernoulliNB Valid F1 Score:", imdb_bbow_bernoullinb_valid_f1)
print("IMDB Binary BoW BernoulliNB Test F1 Score:", imdb_bbow_bernoullinb_test_f1)
print("IMDB Best Binary BoW BernoulliNB Alpha Value: ", imdb_best_bbow_bernoullinb_alpha)