In [None]:
# import libraries
import os
import random
import numpy as np
import pandas as pd
import scipy
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve, auc
from matplotlib import pyplot as plt
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier


# **Load the data**

In [None]:
train_data = pd.read_csv('/content/drive/MyDrive/Copy of train_data.csv')
test_data = pd.read_csv('/content/drive/MyDrive/Copy of test_data.csv')

In [None]:
bow_training_features = scipy.sparse.load_npz("/content/drive/MyDrive/Copy of bow_training_features.npz")
bow_test_features = scipy.sparse.load_npz("/content/drive/MyDrive/Copy of bow_test_features.npz")

In [None]:
tfidf_with_stop_training_features = scipy.sparse.load_npz("/content/drive/MyDrive/Copy of tfidf_with_stop_training_features.npz")
tfidf_with_stop_test_features = scipy.sparse.load_npz("/content/drive/MyDrive/Copy of tfidf_with_stop_test_features.npz")

In [None]:
tfidf_no_stop_training_features = scipy.sparse.load_npz("/content/drive/MyDrive/Copy of tfidf_no_stop_training_features.npz")
tfidf_no_stop_test_features= scipy.sparse.load_npz("/content/drive/MyDrive/Copy of tfidf_no_stop_test_features.npz")

In [None]:
word2vec_with_stop_training_features = np.load('/content/drive/MyDrive/Copy of word2vec_with_stop_training_features.npy')
word2vec_with_stop_test_features = np.load('/content/drive/MyDrive/Copy of word2vec_with_stop_test_features.npy')

In [None]:
word2vec_no_stop_training_features = np.load('/content/drive/MyDrive/Copy of word2vec_no_stop_training_features.npy')
word2vec_no_stop_test_features = np.load('/content/drive/MyDrive/Copy of word2vec_no_stop_test_features.npy')

In [None]:
bert_word_training_features = np.load('/content/drive/MyDrive/Copy of bert_word_training_features.npy')
bert_word_test_features = np.load('/content/drive/MyDrive/Copy of bert_word_test_features.npy')

In [None]:
bert_sentence_training_features = np.load('/content/drive/MyDrive/Copy of bert_sentence_training_features.npy')
bert_sentence_test_features = np.load('/content/drive/MyDrive/Copy of bert_sentence_test_features.npy')

# **Classifiers**

In [None]:
# define a function to show the results
def printResult(y_pred, y_prob):
    # calculate accuracy
    acc = accuracy_score(test_data["sentiment"], y_pred)
    # show accuracy
    print("Accuracy: {:.2f}".format(acc*100),end='\n\n')
    # find and show calssification report
    print(classification_report(test_data["sentiment"],y_pred))

## **SVM**

In [None]:
# Define a SVM model
SVM_model = SVC(kernel ='linear', C = 1)

### **SVM with BOW**

In [None]:
# Train the SVM model
SVM_model.fit(bow_training_features, train_data["sentiment"])

In [None]:
# Evaluate the SVM model
y_pred_bow_svm = SVM_model.predict(bow_test_features)
y_prob_bow_svm = SVM_model.decision_function(bow_test_features)

In [None]:
# Print the results
printResult(y_pred_bow_svm, y_prob_bow_svm)

Accuracy: 81.94

              precision    recall  f1-score   support

           0       0.81      0.83      0.82     12500
           1       0.83      0.80      0.82     12500

    accuracy                           0.82     25000
   macro avg       0.82      0.82      0.82     25000
weighted avg       0.82      0.82      0.82     25000



### **SVM with TFIDF with stop words**

In [None]:
# Train the SVM model
SVM_model.fit(tfidf_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the SVM model
y_pred_tfidf_with_stop_svm = SVM_model.predict(tfidf_with_stop_test_features)
y_prob_tfidf_with_stop_svm = SVM_model.decision_function(tfidf_with_stop_test_features)

In [None]:
# Print the results
printResult(y_pred_tfidf_with_stop_svm, y_prob_tfidf_with_stop_svm)

Accuracy: 89.59

              precision    recall  f1-score   support

           0       0.90      0.90      0.90     12500
           1       0.90      0.90      0.90     12500

    accuracy                           0.90     25000
   macro avg       0.90      0.90      0.90     25000
weighted avg       0.90      0.90      0.90     25000



### **SVM with TFIDF without stop words**

In [None]:
# Train the SVM model
SVM_model.fit(tfidf_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the SVM model
y_pred_tfidf_no_stop_svm = SVM_model.predict(tfidf_no_stop_test_features)
y_prob_tfidf_no_stop_svm = SVM_model.decision_function(tfidf_no_stop_test_features)

In [None]:
# Print the results
printResult(y_pred_tfidf_no_stop_svm, y_prob_tfidf_no_stop_svm)

Accuracy: 87.93

              precision    recall  f1-score   support

           0       0.88      0.88      0.88     12500
           1       0.88      0.88      0.88     12500

    accuracy                           0.88     25000
   macro avg       0.88      0.88      0.88     25000
weighted avg       0.88      0.88      0.88     25000



### **SVM with W2V with stop words**

In [None]:
# Train the SVM model
SVM_model.fit(word2vec_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the SVM model
y_pred_w2v_with_stop_svm = SVM_model.predict(word2vec_with_stop_test_features)
y_prob_w2v_with_stop_svm = SVM_model.decision_function(word2vec_with_stop_test_features)

In [None]:
# Print the results
printResult(y_pred_w2v_with_stop_svm, y_prob_w2v_with_stop_svm)

Accuracy: 83.68

              precision    recall  f1-score   support

           0       0.84      0.84      0.84     12500
           1       0.84      0.84      0.84     12500

    accuracy                           0.84     25000
   macro avg       0.84      0.84      0.84     25000
weighted avg       0.84      0.84      0.84     25000



### **SVM with W2V without stop words**

In [None]:
# Train the SVM model
SVM_model.fit(word2vec_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the SVM model
y_pred_w2v_no_stop_svm = SVM_model.predict(word2vec_no_stop_test_features)
y_prob_w2v_no_stop_svm = SVM_model.decision_function(word2vec_no_stop_test_features)

In [None]:
# Print the results
printResult(y_pred_w2v_no_stop_svm, y_prob_w2v_no_stop_svm)

Accuracy: 83.68

              precision    recall  f1-score   support

           0       0.84      0.84      0.84     12500
           1       0.84      0.84      0.84     12500

    accuracy                           0.84     25000
   macro avg       0.84      0.84      0.84     25000
weighted avg       0.84      0.84      0.84     25000



### **SVM with BERT (words)**

In [None]:
# Train the SVM model
SVM_model.fit(bert_word_training_features, train_data["sentiment"])

In [None]:
# Evaluate the SVM model
y_pred_bert_words_svm = SVM_model.predict(bert_word_test_features)
y_prob_bert_words_svm = SVM_model.decision_function(bert_word_test_features)

In [None]:
# Print the results
printResult(y_pred_bert_words_svm, y_prob_bert_words_svm)

Accuracy: 87.99

              precision    recall  f1-score   support

           0       0.88      0.88      0.88     12500
           1       0.88      0.88      0.88     12500

    accuracy                           0.88     25000
   macro avg       0.88      0.88      0.88     25000
weighted avg       0.88      0.88      0.88     25000



### **SVM with BERT (sentences)**

In [None]:
# Train the SVM model
SVM_model.fit(bert_sentence_training_features, train_data["sentiment"])

In [None]:
# Evaluate the SVM model
y_pred_bert_sentences_svm = SVM_model.predict(bert_sentence_test_features)
y_prob_bert_sentences_svm = SVM_model.decision_function(bert_sentence_test_features)

In [None]:
# Print the results
printResult(y_pred_bert_sentences_svm, y_prob_bert_sentences_svm)

Accuracy: 90.38

              precision    recall  f1-score   support

           0       0.90      0.91      0.90     12500
           1       0.91      0.90      0.90     12500

    accuracy                           0.90     25000
   macro avg       0.90      0.90      0.90     25000
weighted avg       0.90      0.90      0.90     25000



## **Random Forest**

In [None]:
# Define the Random Forest Classifier model
RF_model = RandomForestClassifier(n_estimators=100, random_state=0)

### **Random Forest with BOW**

In [None]:
# Train the the model
RF_model.fit(bow_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bow_rf = RF_model.predict(bow_test_features)
y_prob_bow_rf = RF_model.predict_proba(bow_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bow_rf, y_prob_bow_rf)

Accuracy: 84.48

              precision    recall  f1-score   support

           0       0.84      0.85      0.85     12500
           1       0.85      0.84      0.84     12500

    accuracy                           0.84     25000
   macro avg       0.84      0.84      0.84     25000
weighted avg       0.84      0.84      0.84     25000



### **Random Forest with TFIDF with stop words**

In [None]:
# Train the the model
RF_model.fit(tfidf_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_tfidf_with_stop_rf = RF_model.predict(tfidf_with_stop_test_features)
y_prob_tfidf_with_stop_rf = RF_model.predict_proba(tfidf_with_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_tfidf_with_stop_rf, y_prob_tfidf_with_stop_rf)

Accuracy: 84.84

              precision    recall  f1-score   support

           0       0.84      0.86      0.85     12500
           1       0.86      0.83      0.85     12500

    accuracy                           0.85     25000
   macro avg       0.85      0.85      0.85     25000
weighted avg       0.85      0.85      0.85     25000



### **Random Forest with TFIDF without stop words**

In [None]:
# Train the the model
RF_model.fit(tfidf_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_tfidf_no_stop_rf = RF_model.predict(tfidf_no_stop_test_features)
y_prob_tfidf_no_stop_rf = RF_model.predict_proba(tfidf_no_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_tfidf_no_stop_rf, y_prob_tfidf_no_stop_rf)

Accuracy: 85.10

              precision    recall  f1-score   support

           0       0.84      0.86      0.85     12500
           1       0.86      0.84      0.85     12500

    accuracy                           0.85     25000
   macro avg       0.85      0.85      0.85     25000
weighted avg       0.85      0.85      0.85     25000



### **Random Forest with W2V with stop words**

In [None]:
# Train the the model
RF_model.fit(word2vec_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_w2v_with_stop_rf = RF_model.predict(word2vec_with_stop_test_features)
y_prob_w2v_with_stop_rf = RF_model.predict_proba(word2vec_with_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_w2v_with_stop_rf, y_prob_w2v_with_stop_rf)

Accuracy: 79.32

              precision    recall  f1-score   support

           0       0.79      0.79      0.79     12500
           1       0.79      0.79      0.79     12500

    accuracy                           0.79     25000
   macro avg       0.79      0.79      0.79     25000
weighted avg       0.79      0.79      0.79     25000



### **Random Forest with W2V without stop words**

In [None]:
# Train the the model
RF_model.fit(word2vec_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_w2v_no_stop_rf = RF_model.predict(word2vec_no_stop_test_features)
y_prob_w2v_no_stop_rf = RF_model.predict_proba(word2vec_no_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_w2v_no_stop_rf, y_prob_w2v_no_stop_rf)

Accuracy: 79.32

              precision    recall  f1-score   support

           0       0.79      0.79      0.79     12500
           1       0.79      0.79      0.79     12500

    accuracy                           0.79     25000
   macro avg       0.79      0.79      0.79     25000
weighted avg       0.79      0.79      0.79     25000



### **Random Forest with BERT (words)**

In [None]:
# Train the the model
RF_model.fit(bert_word_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bert_words_rf = RF_model.predict(bert_word_test_features)
y_prob_bert_words_rf = RF_model.predict_proba(bert_word_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bert_words_rf, y_prob_bert_words_rf)

Accuracy: 81.17

              precision    recall  f1-score   support

           0       0.80      0.83      0.81     12500
           1       0.82      0.80      0.81     12500

    accuracy                           0.81     25000
   macro avg       0.81      0.81      0.81     25000
weighted avg       0.81      0.81      0.81     25000



### **Random Forest with BERT (sentences)**

In [None]:
# Train the the model
RF_model.fit(bert_sentence_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bert_sentences_rf = RF_model.predict(bert_sentence_test_features)
y_prob_bert_sentences_rf = RF_model.predict_proba(bert_sentence_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bert_sentences_rf, y_prob_bert_sentences_rf)

Accuracy: 86.22

              precision    recall  f1-score   support

           0       0.86      0.86      0.86     12500
           1       0.86      0.86      0.86     12500

    accuracy                           0.86     25000
   macro avg       0.86      0.86      0.86     25000
weighted avg       0.86      0.86      0.86     25000



## **Decision Tree**

In [None]:
# Define the Decision Tree Classifier model
DT_model = DecisionTreeClassifier()

### **Decision Tree with BOW**

In [None]:
# Train the the model
DT_model.fit(bow_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bow_dt = DT_model.predict(bow_test_features)
y_prob_bow_dt = DT_model.predict_proba(bow_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bow_dt, y_prob_bow_dt)

Accuracy: 72.51

              precision    recall  f1-score   support

           0       0.72      0.73      0.73     12500
           1       0.73      0.72      0.73     12500

    accuracy                           0.73     25000
   macro avg       0.73      0.73      0.73     25000
weighted avg       0.73      0.73      0.73     25000



### **Decision Tree with TFIDF with stop words**

In [None]:
# Train the the model
DT_model.fit(tfidf_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_tfidf_with_stop_dt = DT_model.predict(tfidf_with_stop_test_features)
y_prob_tfidf_with_stop_dt = DT_model.predict_proba(tfidf_with_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_tfidf_with_stop_dt, y_prob_tfidf_with_stop_dt)

Accuracy: 70.36

              precision    recall  f1-score   support

           0       0.71      0.70      0.70     12500
           1       0.70      0.71      0.71     12500

    accuracy                           0.70     25000
   macro avg       0.70      0.70      0.70     25000
weighted avg       0.70      0.70      0.70     25000



### **Decision Tree with TFIDF without stop words**

In [None]:
# Train the the model
DT_model.fit(tfidf_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_tfidf_no_stop_dt = DT_model.predict(tfidf_no_stop_test_features)
y_prob_tfidf_no_stop_dt = DT_model.predict_proba(tfidf_no_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_tfidf_no_stop_dt, y_prob_tfidf_no_stop_dt)

Accuracy: 71.44

              precision    recall  f1-score   support

           0       0.71      0.72      0.72     12500
           1       0.72      0.71      0.71     12500

    accuracy                           0.71     25000
   macro avg       0.71      0.71      0.71     25000
weighted avg       0.71      0.71      0.71     25000



### **Decision Tree with W2V with stop words**

In [None]:
# Train the the model
DT_model.fit(word2vec_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_w2v_with_stop_dt = DT_model.predict(word2vec_with_stop_test_features)
y_prob_w2v_with_stop_dt = DT_model.predict_proba(word2vec_with_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_w2v_with_stop_dt, y_prob_w2v_with_stop_dt)

Accuracy: 67.20

              precision    recall  f1-score   support

           0       0.67      0.68      0.68     12500
           1       0.68      0.66      0.67     12500

    accuracy                           0.67     25000
   macro avg       0.67      0.67      0.67     25000
weighted avg       0.67      0.67      0.67     25000



### **Decision Tree with W2V without stop words**

In [None]:
# Train the the model
DT_model.fit(word2vec_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_w2v_no_stop_dt = DT_model.predict(word2vec_no_stop_test_features)
y_prob_w2v_no_stop_dt = DT_model.predict_proba(word2vec_no_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_w2v_no_stop_dt, y_prob_w2v_no_stop_dt)

Accuracy: 67.07

              precision    recall  f1-score   support

           0       0.67      0.68      0.67     12500
           1       0.67      0.66      0.67     12500

    accuracy                           0.67     25000
   macro avg       0.67      0.67      0.67     25000
weighted avg       0.67      0.67      0.67     25000



### **Decision Tree with BERT (words)**

In [None]:
# Train the the model
DT_model.fit(bert_word_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bert_words_dt = DT_model.predict(bert_word_test_features)
y_prob_bert_words_dt = DT_model.predict_proba(bert_word_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bert_words_dt, y_prob_bert_words_dt)

Accuracy: 67.20

              precision    recall  f1-score   support

           0       0.67      0.67      0.67     12500
           1       0.67      0.67      0.67     12500

    accuracy                           0.67     25000
   macro avg       0.67      0.67      0.67     25000
weighted avg       0.67      0.67      0.67     25000



### **Decision Tree with BERT (sentences)**

In [None]:
# Train the the model
DT_model.fit(bert_sentence_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bert_sentences_dt = DT_model.predict(bert_sentence_test_features)
y_prob_bert_sentences_dt = DT_model.predict_proba(bert_sentence_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bert_sentences_dt, y_prob_bert_sentences_dt)

Accuracy: 78.47

              precision    recall  f1-score   support

           0       0.78      0.78      0.78     12500
           1       0.78      0.78      0.78     12500

    accuracy                           0.78     25000
   macro avg       0.78      0.78      0.78     25000
weighted avg       0.78      0.78      0.78     25000



## **Naive Bayes**

In [None]:
# Define the Naive Bayes model with multinomial distribution
NB_model = MultinomialNB()

### **Naive Bayes with BOW**

In [None]:
# Train the the model
NB_model.fit(bow_training_features.toarray(), train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bow_nb = NB_model.predict(bow_test_features.toarray())
y_prob_bow_nb = NB_model.predict_proba(bow_test_features.toarray())[:,1]

In [None]:
# Print the results
printResult(y_pred_bow_nb, y_prob_bow_nb)

Accuracy: 81.30

              precision    recall  f1-score   support

           0       0.78      0.87      0.82     12500
           1       0.85      0.75      0.80     12500

    accuracy                           0.81     25000
   macro avg       0.82      0.81      0.81     25000
weighted avg       0.82      0.81      0.81     25000



### **Naive Bayes with TFIDF with stop words**

In [None]:
# Train the the model
NB_model.fit(tfidf_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_tfidf_with_stop_nb = NB_model.predict(tfidf_with_stop_test_features)
y_prob_tfidf_with_stop_nb = NB_model.predict_proba(tfidf_with_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_tfidf_with_stop_nb, y_prob_tfidf_with_stop_nb)

Accuracy: 86.31

              precision    recall  f1-score   support

           0       0.86      0.87      0.86     12500
           1       0.87      0.86      0.86     12500

    accuracy                           0.86     25000
   macro avg       0.86      0.86      0.86     25000
weighted avg       0.86      0.86      0.86     25000



### **Naive Bayes with TFIDF without stop words**

In [None]:
# Train the the model
NB_model.fit(tfidf_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_tfidf_no_stop_nb = NB_model.predict(tfidf_no_stop_test_features)
y_prob_tfidf_no_stop_nb = NB_model.predict_proba(tfidf_no_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_tfidf_no_stop_nb, y_prob_tfidf_no_stop_nb)

Accuracy: 84.58

              precision    recall  f1-score   support

           0       0.83      0.86      0.85     12500
           1       0.86      0.83      0.84     12500

    accuracy                           0.85     25000
   macro avg       0.85      0.85      0.85     25000
weighted avg       0.85      0.85      0.85     25000



### **Naive Bayes with W2V with stop words**

In [None]:
# Define the Naive Bayes model Gaussian distribution
NB_model = GaussianNB()

In [None]:
# Train the the model
NB_model.fit(word2vec_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_w2v_with_stop_nb = NB_model.predict(word2vec_with_stop_test_features)
y_prob_w2v_with_stop_nb = NB_model.predict_proba(word2vec_with_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_w2v_with_stop_nb, y_prob_w2v_with_stop_nb)

Accuracy: 67.18

              precision    recall  f1-score   support

           0       0.66      0.72      0.69     12500
           1       0.69      0.62      0.65     12500

    accuracy                           0.67     25000
   macro avg       0.67      0.67      0.67     25000
weighted avg       0.67      0.67      0.67     25000



### **Naive Bayes with W2V without stop words**

In [None]:
# Train the the model
NB_model.fit(word2vec_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_w2v_no_stop_nb = NB_model.predict(word2vec_no_stop_test_features)
y_prob_w2v_no_stop_nb = NB_model.predict_proba(word2vec_no_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_w2v_no_stop_nb, y_prob_w2v_no_stop_nb)

Accuracy: 67.18

              precision    recall  f1-score   support

           0       0.66      0.72      0.69     12500
           1       0.69      0.62      0.65     12500

    accuracy                           0.67     25000
   macro avg       0.67      0.67      0.67     25000
weighted avg       0.67      0.67      0.67     25000



### **Naive Bayes with BERT (words)**

In [None]:
# Train the the model
NB_model.fit(bert_word_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bert_words_nb = NB_model.predict(bert_word_test_features)
y_prob_bert_words_nb = NB_model.predict_proba(bert_word_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bert_words_nb, y_prob_bert_words_nb)

Accuracy: 55.21

              precision    recall  f1-score   support

           0       0.54      0.75      0.63     12500
           1       0.59      0.36      0.44     12500

    accuracy                           0.55     25000
   macro avg       0.56      0.55      0.53     25000
weighted avg       0.56      0.55      0.53     25000



### **Naive Bayes with BERT (sentences)**

In [None]:
# Train the the model
NB_model.fit(bert_sentence_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bert_sentences_nb = NB_model.predict(bert_sentence_test_features)
y_prob_bert_sentences_nb = NB_model.predict_proba(bert_sentence_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bert_sentences_nb, y_prob_bert_sentences_nb)

Accuracy: 84.22

              precision    recall  f1-score   support

           0       0.83      0.86      0.85     12500
           1       0.86      0.82      0.84     12500

    accuracy                           0.84     25000
   macro avg       0.84      0.84      0.84     25000
weighted avg       0.84      0.84      0.84     25000



## **MLP**

In [None]:
# define the MLP model
MLP_model = MLPClassifier(solver='adam', alpha=2e-3, hidden_layer_sizes=(5, 2), max_iter=400)

### **MLP with BOW**

In [None]:
# Train the the model
MLP_model.fit(bow_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bow_mlp = MLP_model.predict(bow_test_features)
y_prob_bow_mlp = MLP_model.predict_proba(bow_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bow_mlp, y_prob_bow_mlp)

Accuracy: 84.04

              precision    recall  f1-score   support

           0       0.82      0.88      0.85     12500
           1       0.87      0.80      0.83     12500

    accuracy                           0.84     25000
   macro avg       0.84      0.84      0.84     25000
weighted avg       0.84      0.84      0.84     25000



### **MLP with TFIDF with stop words**

In [None]:
# Train the the model
MLP_model.fit(tfidf_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_tfidf_with_stop_mlp = MLP_model.predict(tfidf_with_stop_test_features)
y_prob_tfidf_with_stop_mlp = MLP_model.predict_proba(tfidf_with_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_tfidf_with_stop_mlp, y_prob_tfidf_with_stop_mlp)

Accuracy: 87.01

              precision    recall  f1-score   support

           0       0.86      0.88      0.87     12500
           1       0.88      0.86      0.87     12500

    accuracy                           0.87     25000
   macro avg       0.87      0.87      0.87     25000
weighted avg       0.87      0.87      0.87     25000



### **MLP with TFIDF without stop words**

In [None]:
# Train the the model
MLP_model.fit(tfidf_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_tfidf_no_stop_mlp = MLP_model.predict(tfidf_no_stop_test_features)
y_prob_tfidf_no_stop_mlp = MLP_model.predict_proba(tfidf_no_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_tfidf_no_stop_mlp, y_prob_tfidf_no_stop_mlp)

Accuracy: 85.00

              precision    recall  f1-score   support

           0       0.85      0.86      0.85     12500
           1       0.85      0.84      0.85     12500

    accuracy                           0.85     25000
   macro avg       0.85      0.85      0.85     25000
weighted avg       0.85      0.85      0.85     25000



### **MLP with W2V with stop words**

In [None]:
# Train the the model
MLP_model.fit(word2vec_with_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_word2vec_with_stop_mlp = MLP_model.predict(word2vec_with_stop_test_features)
y_prob_word2vec_with_stop_mlp = MLP_model.predict_proba(word2vec_with_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_word2vec_with_stop_mlp, y_prob_word2vec_with_stop_mlp)

Accuracy: 83.68

              precision    recall  f1-score   support

           0       0.83      0.84      0.84     12500
           1       0.84      0.83      0.84     12500

    accuracy                           0.84     25000
   macro avg       0.84      0.84      0.84     25000
weighted avg       0.84      0.84      0.84     25000



### **MLP with W2V without stop words**

In [None]:
# Train the the model
MLP_model.fit(word2vec_no_stop_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_word2vec_no_stop_mlp = MLP_model.predict(word2vec_no_stop_test_features)
y_prob_word2vec_no_stop_mlp = MLP_model.predict_proba(word2vec_no_stop_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_word2vec_no_stop_mlp, y_prob_word2vec_no_stop_mlp)

Accuracy: 83.77

              precision    recall  f1-score   support

           0       0.85      0.81      0.83     12500
           1       0.82      0.86      0.84     12500

    accuracy                           0.84     25000
   macro avg       0.84      0.84      0.84     25000
weighted avg       0.84      0.84      0.84     25000



### **MLP with Bert (words)**

In [None]:
# Train the the model
MLP_model.fit(bert_word_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bert_word_mlp = MLP_model.predict(bert_word_test_features)
y_prob_bert_word_mlp = MLP_model.predict_proba(bert_word_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bert_word_mlp, y_prob_bert_word_mlp)

Accuracy: 87.58

              precision    recall  f1-score   support

           0       0.89      0.86      0.87     12500
           1       0.87      0.89      0.88     12500

    accuracy                           0.88     25000
   macro avg       0.88      0.88      0.88     25000
weighted avg       0.88      0.88      0.88     25000



### **MLP with Bert (sentences)**

In [None]:
# Train the the model
MLP_model.fit(bert_sentence_training_features, train_data["sentiment"])

In [None]:
# Evaluate the model
y_pred_bert_sentences_mlp = MLP_model.predict(bert_sentence_test_features)
y_prob_bert_sentences_mlp = MLP_model.predict_proba(bert_sentence_test_features)[:,1]

In [None]:
# Print the results
printResult(y_pred_bert_sentences_mlp, y_prob_bert_sentences_mlp)

Accuracy: 90.28

              precision    recall  f1-score   support

           0       0.89      0.91      0.90     12500
           1       0.91      0.89      0.90     12500

    accuracy                           0.90     25000
   macro avg       0.90      0.90      0.90     25000
weighted avg       0.90      0.90      0.90     25000

