## 1. Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import os
import numpy as np
import pandas as pd
import time

In [None]:
os.chdir('./drive/MyDrive/IR Final Project/CoLab')

In [None]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 5.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 35.8 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 28.2 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 31.2 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 5.6 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: p

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from transformers.file_utils import is_tf_available, is_torch_available, is_torch_tpu_available
from transformers import BertTokenizerFast, BertForSequenceClassification
from transformers import Trainer, TrainingArguments

In [None]:
import nltk
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.feature_extraction.text import CountVectorizer


## 2. Data

In [None]:
df = pd.read_csv("model_dev_data.csv", encoding = "ISO-8859-1", header=None)

In [None]:
df = df.loc[:, (0,5)]

In [None]:
df.columns = ["Rating","Text"]

In [None]:
df["Rating"] = df["Rating"].map({4 : 1, 0:0}) #mapping positive to 1 instead of 4

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
df = df.sample(100000, random_state=0) #scaling down so processing time is achievable 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
  df["Text"], df["Rating"], test_size=0.33, random_state=42)

In [None]:
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)

y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

### 2.1 Results DF

In [None]:
results_df = pd.DataFrame() #save out name, results, time

## 3. Cleaning Functions

In [None]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

 
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


In [None]:
lemmatizer = WordNetLemmatizer()

In [None]:
stop_words = set(stopwords.words("english"))

In [None]:
def word_tokens(row):
    return word_tokenize(row)

In [None]:
def clean_data(df):
    df = df.apply(word_tokenize)
    df = df.map(lambda x: [i.lower() for i in x if i.lower() not in stop_words])
    df = df.map(lambda x: [lemmatizer.lemmatize(i) for i in x if i.lower() if i.isalnum()])
    df = df.map(lambda x: ' '.join(x))

    return df

In [None]:
X_train = clean_data(X_train)

In [None]:
start = time.time()
X_test = clean_data(X_test)
end = time.time()
tot_time = (end-start)/60

In [None]:
X_test_1 = X_test[:500]
y_test_1 = y_test[:500]


X_test_2 = X_test[500:1000].reset_index(drop=True)
y_test_2 = y_test[500:1000].reset_index(drop=True)


X_test_3 = X_test[1000:1500].reset_index(drop=True)
y_test_3 = y_test[1000:1500].reset_index(drop=True)

In [None]:
results_df = results_df.append(pd.Series(["Cleaning", 0, 0, 0, 0, tot_time]), ignore_index=True)

## 4. Bert

In [None]:
# # model_name = "distilbert-base-uncased-finetuned-sst-2-english"
# model_name = "bert-base-uncased"
model_name = "prajjwal1/bert-mini"
max_length = 512

In [None]:
classifier_simple = pipeline('sentiment-analysis', model = model_name)

Downloading:   0%|          | 0.00/286 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/43.0M [00:00<?, ?B/s]

Some weights of the model checkpoint at prajjwal1/bert-mini were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier_complex = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

Some weights of the model checkpoint at prajjwal1/bert-mini were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

In [None]:
def bert_func(text_column):
  results = np.zeros((len(text_column),1))
  for i in range(0, len(text_column)):
      result_1 =  classifier_simple(text_column[i])
      if result_1[0]['label'] == "LABEL_1":
        results[i] = 1
  return results

In [None]:
start = time.time()
res = bert_func(X_test_1)
end = time.time()

accuracy = accuracy_score(y_test_1, res)
cm = confusion_matrix(y_test_1, res)
precision = cm[0][0]/(cm[0][0] + cm[1][0])
recall = cm[0][0]/(cm[0][0] + cm[0][1])
f = 2*(precision * recall)/(precision + recall)
tot_time = (end-start)/60

results_df = results_df.append(pd.Series(["BERT, BERT,",1, precision, recall, accuracy, f, tot_time]), ignore_index=True)

In [None]:
start = time.time()
res = bert_func(X_test_2)
end = time.time()

accuracy = accuracy_score(y_test_2, res)
cm = confusion_matrix(y_test_2, res)
precision = cm[0][0]/(cm[0][0] + cm[1][0])
recall = cm[0][0]/(cm[0][0] + cm[0][1])
f = 2*(precision * recall)/(precision + recall)
tot_time = (end-start)/60

results_df = results_df.append(pd.Series(["BERT, BERT,",2, precision, recall, accuracy, f, tot_time]), ignore_index=True)

In [None]:
start = time.time()
res = bert_func(X_test_3)
end = time.time()

accuracy = accuracy_score(y_test_3, res)
cm = confusion_matrix(y_test_3, res)
precision = cm[0][0]/(cm[0][0] + cm[1][0])
recall = cm[0][0]/(cm[0][0] + cm[0][1])
f = 2*(precision * recall)/(precision + recall)
tot_time = (end-start)/60

results_df = results_df.append(pd.Series(["BERT, BERT,",3, precision, recall, accuracy, f, tot_time]), ignore_index=True)

In [None]:
accuracy

0.496

In [None]:
results_df.to_csv("Results.csv")

## 5. Training Our Own Model: TF-IDF

In [None]:
vect2 = TfidfVectorizer()

x_train_tfidf = vect2.fit_transform(X_train)

In [None]:
x_test_1_tfidf = vect2.transform(X_test_1)
x_test_2_tfidf = vect2.transform(X_test_2)
x_test_3_tfidf = vect2.transform(X_test_3)

In [None]:
#### NB: TFIDF

clf = MultinomialNB()
clf.fit(x_train_tfidf, y_train)

MultinomialNB()

In [None]:
import pickle

In [None]:
filename = 'NB_TFIDF.sav'
pickle.dump(clf, open(filename, 'wb'))

In [None]:
#Test set 1:
start = time.time()
y_pred = clf.predict(x_test_1_tfidf) #prediction from model
end = time.time()
accuracy = accuracy_score(y_test_1, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_1, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time= (end - start)/60

results_df = results_df.append(pd.Series(["NB, TFIDF",1, precision, recall, accuracy, f, tot_time]), ignore_index=True)


Test Accuracy:  0.756
precision is 0.7544483985765125 
recall is 0.8


In [None]:
#Test set 2:
start = time.time()
y_pred = clf.predict(x_test_2_tfidf) #prediction from model
end = time.time()
accuracy = accuracy_score(y_test_2, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_2, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time= (end - start)/60

results_df = results_df.append(pd.Series(["NB, TFIDF",2, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.738
precision is 0.7056603773584905 
recall is 0.7791666666666667


In [None]:
#Test set 2:
start = time.time()
y_pred = clf.predict(x_test_3_tfidf) #prediction from model
end = time.time()
accuracy = accuracy_score(y_test_3, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_3, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time= (end - start)/60

results_df = results_df.append(pd.Series(["NB, TFIDF",3, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.762
precision is 0.7196969696969697 
recall is 0.8085106382978723


In [None]:
#### Logistic Regression: TFIDF

clf = LogisticRegression(max_iter = 1000)
clf.fit(x_train_tfidf, y_train)

filename = 'LOG_TFIDF.sav'
pickle.dump(clf, open(filename, 'wb'))

In [None]:
#Test set 1:
start = time.time()
y_pred = clf.predict(x_test_1_tfidf) #prediction from model
end = time.time()
accuracy = accuracy_score(y_test_1, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_1, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time= (end - start)/60

results_df = results_df.append(pd.Series(["LOG, TFIDF",1, precision, recall, accuracy, f, tot_time]), ignore_index=True)


Test Accuracy:  0.756
precision is 0.7782101167315175 
recall is 0.7547169811320755


In [None]:
#Test set 2:
start = time.time()
y_pred = clf.predict(x_test_2_tfidf) #prediction from model
end = time.time()
accuracy = accuracy_score(y_test_2, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_2, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time= (end - start)/60

results_df = results_df.append(pd.Series(["LOG, TFIDF",2, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.756
precision is 0.7565217391304347 
recall is 0.725


In [None]:
#Test set 3:
start = time.time()
y_pred = clf.predict(x_test_3_tfidf) #prediction from model
end = time.time()
accuracy = accuracy_score(y_test_3, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_3, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time= (end - start)/60

results_df = results_df.append(pd.Series(["LOG, TFIDF",3, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.806
precision is 0.8080357142857143 
recall is 0.7702127659574468


In [None]:
results_df.to_csv("results.csv")

## 7. Training Our Own Model: BOW

In [None]:
vect = CountVectorizer()
x_train_bow = vect.fit_transform(X_train)

In [None]:
x_test_bow_1 = vect.transform(X_test_1)
x_test_bow_2 = vect.transform(X_test_2)
x_test_bow_3 = vect.transform(X_test_3)

In [None]:
#### NB: BOW

clf = MultinomialNB()
clf.fit(x_train_bow, y_train)

filename = 'NB_BOW.sav'
pickle.dump(clf, open(filename, 'wb'))

In [None]:
start = time.time()
y_pred = clf.predict(x_test_bow_1) #prediction from model
end  = time.time()
accuracy = accuracy_score(y_test_1, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_1, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time  = (end-start)/60

results_df = results_df.append(pd.Series(["NB, BOW",1, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.766
precision is 0.7681159420289855 
recall is 0.8


In [None]:
start = time.time()
y_pred = clf.predict(x_test_bow_2) #prediction from model
end  = time.time()
accuracy = accuracy_score(y_test_2, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_2, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time  = (end-start)/60

results_df = results_df.append(pd.Series(["NB, BOW",2, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.73
precision is 0.7042801556420234 
recall is 0.7541666666666667


In [None]:
start = time.time()
y_pred = clf.predict(x_test_bow_3) #prediction from model
end  = time.time()
accuracy = accuracy_score(y_test_3, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_3, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time  = (end-start)/60

results_df = results_df.append(pd.Series(["NB, BOW",3, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.764
precision is 0.7312252964426877 
recall is 0.7872340425531915


In [None]:
#### Logistic Regression: BOW

clf = LogisticRegression()
clf.fit(x_train_bow, y_train)

filename = 'LOG_BOW.sav'
pickle.dump(clf, open(filename, 'wb'))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [None]:
start = time.time()
y_pred = clf.predict(x_test_bow_1) #prediction from model
end  = time.time()
accuracy = accuracy_score(y_test_1, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_1, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time  = (end-start)/60

results_df = results_df.append(pd.Series(["LOG, WORDVEC",1, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.754
precision is 0.7817460317460317 
recall is 0.7433962264150943


In [None]:
start = time.time()
y_pred = clf.predict(x_test_bow_2) #prediction from model
end  = time.time()
accuracy = accuracy_score(y_test_2, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_2, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time  = (end-start)/60

results_df = results_df.append(pd.Series(["LOG, WORDVEC",2, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.768
precision is 0.7743362831858407 
recall is 0.7291666666666666


In [None]:
start = time.time()
y_pred = clf.predict(x_test_bow_3) #prediction from model
end  = time.time()
accuracy = accuracy_score(y_test_3, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_3, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

tot_time  = (end-start)/60

results_df = results_df.append(pd.Series(["LOG, WORDVEC",3, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.79
precision is 0.7901785714285714 
recall is 0.7531914893617021


In [None]:
results_df.to_csv("Results.csv")

## 8. One last hail mary

In [None]:
with open('Words.txt') as f:
    lines = f.readlines()

In [None]:
words = pd.DataFrame()

In [None]:
words["Word"] = list(map(lambda x: x.split('\t')[0], lines))

In [None]:
words["Score"] = list(map(lambda x: int(x.split('\t')[1].split('\n')[0]), lines))

In [None]:
min(words["Score"])

-5

In [None]:
words =words.set_index('Word')

In [None]:
X_test[0]

'taking rest long day school'

In [None]:
np.mean(words["Score"])

-0.5894226887363746

In [None]:
def get_score(text):
  score = 0
  for i in text.split(' '):
    try:
      score += int(words.loc[i, "Score"])
    except:
      continue
  return score

In [None]:
def get_pred(score):
  if score >0:
    return 1
  else:
    return 0

In [None]:
def Leahs_Naive_Bayes(text):
  pred = list(map(lambda x: get_pred(get_score(x)), text))
  return pred

In [None]:
start = time.time()
y_pred = Leahs_Naive_Bayes(X_test_1)
end = time.time()

tot_time = (end - start)/60

accuracy = accuracy_score(y_test_1, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_1, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

results_df = results_df.append(pd.Series(["Leahs Model", 1, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.65
precision is 0.6451612903225806 
recall is 0.7547169811320755


In [None]:
start = time.time()
y_pred = Leahs_Naive_Bayes(X_test_2)
end = time.time()

tot_time = (end - start)/60

accuracy = accuracy_score(y_test_2, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_2, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

results_df = results_df.append(pd.Series(["Leahs Model", 2, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.656
precision is 0.6197183098591549 
recall is 0.7333333333333333


In [None]:
start = time.time()
y_pred = Leahs_Naive_Bayes(X_test_3)
end = time.time()

tot_time = (end - start)/60

accuracy = accuracy_score(y_test_3, y_pred)
print('Test Accuracy: ', accuracy)

cm = confusion_matrix(y_test_3, y_pred)

precision = cm[0][0]/(cm[0][0] + cm[1][0])
print(f"precision is {precision} ")

recall = cm[0][0]/(cm[0][0] + cm[0][1])
print(f"recall is {recall}")

f = 2*(precision * recall)/(precision + recall)

results_df = results_df.append(pd.Series(["Leahs Model", 3, precision, recall, accuracy, f, tot_time]), ignore_index=True)

Test Accuracy:  0.632
precision is 0.5894736842105263 
recall is 0.7148936170212766


In [None]:
results_df.to_csv("Results.csv")

In [None]:
results_df.to_csv("Results.csv")