In [113]:
# Run this cell to mount your drive to this notebook in order to read the datasets
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [114]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

## Read Dataset

In [115]:
# Put the folder path where the datasets are located
PATH = "/content/drive/MyDrive/cs445/hw1/"

In [116]:
# Read the train and test set with read_csv() method of pandas
train = pd.read_csv(PATH + "train.csv")
test = pd.read_csv(PATH + "test.csv")

### Preprocess Dataset

In [117]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [118]:
pd.set_option("display.max_colwidth", None)

In [119]:
train[:3]

Unnamed: 0.1,Unnamed: 0,text,label
0,0,I came here and left a review before but last time I didn't get food poisoning. Unless you want to stay up all night puking I suggest you don't come here.,1
1,1,"Had a very nice first visit here. The owner Ted, was very friendly from the start and the restaurant was busy on a Friday night the beginning of Chinese New Year. We ordered the Pu-pu platter and crab Rangoon and for dinner chef specials with seafood instead of Chinese American regulars and some fried rice. Everything was well prepared. The shrimp large vegetables crisp. The chili sauce sweet and tangy. \n\nWe will return. The owner made an effort to visit and learn our names and ask about our first visit.",4
2,2,"This is a gorgeous and very clean hotel. We had a room in the West Wing. At first it was a chore to get to the room but if you look for different features of the hotel (they also have posted signs) you will easily find your way around. Self parking was excellent and the walk to Check In was confusing at first but you'll figure it out quickly. Check In was quick. The walk to the room was enjoyable as the floors were shiny and clean. The room was in almost immaculate shape. Well decorated and very, very clean. Loved the carpet and the dark lacquered furniture and accents. Absolutely gorgeous. Bed was super comfortable and pillow just right. Towels were thick and soft. Water pressure in sink was less than satisfactory and shower pressure too. Bottom left hand drawer almost fell out when I opened it. This may be nit picking but it was mark down because of water pressure and broken drawer. Another sink would have been nice. Yes, I would stay here again.",4


In [120]:
# Define a function to perform preprocessing. This function can perform things like lowercasing, stemming, removing stopwords, etc.
from nltk import word_tokenize
nltk.download('punkt')
en_stopwords = stopwords.words('english')
from nltk.tokenize import RegexpTokenizer

def preprocess(text: str): 
    text = text.lower()
    text = " ".join(text.split()) #Remove Extra Whitespaces
    text = word_tokenize(text) 

    result = []
    for token in text: #remove stopwords
      if token not in en_stopwords:
          result.append(token)
    
    text = result
    
    #if n't, replace with not. because not is important for the dataset!!!

    tokenizer = RegexpTokenizer(r"\w+") #remove punctuations
    lst=tokenizer.tokenize(' '.join(text))

    porter = PorterStemmer() #apply stemming
    result=[]
    for word in lst:
        result.append(porter.stem(word))

    return result

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [121]:
# Apply your preprocessing function to your text fields.

train.text = train.text.apply(preprocess)
test.text = test.text.apply(preprocess)

train.shape, test.shape

((18000, 3), (2000, 3))

In [122]:
to_merge = " " 

for k in range(len(train)):
  train.text[k] = to_merge.join(train.text[k])

for m in range(len(test)):
  test.text[m] = to_merge.join(test.text[m])

In [123]:
train.head(3)

Unnamed: 0.1,Unnamed: 0,text,label
0,0,came left review last time n t get food poison unless want stay night puke suggest n t come,1
1,1,nice first visit owner ted friendli start restaur busi friday night begin chines new year order pu pu platter crab rangoon dinner chef special seafood instead chines american regular fri rice everyth well prepar shrimp larg veget crisp chili sauc sweet tangi return owner made effort visit learn name ask first visit,4
2,2,gorgeou clean hotel room west wing first chore get room look differ featur hotel also post sign easili find way around self park excel walk check confus first ll figur quickli check quick walk room enjoy floor shini clean room almost immacul shape well decor clean love carpet dark lacquer furnitur accent absolut gorgeou bed super comfort pillow right towel thick soft water pressur sink less satisfactori shower pressur bottom left hand drawer almost fell open may nit pick mark water pressur broken drawer anoth sink would nice ye would stay,4


In [124]:
train[15:20]

Unnamed: 0.1,Unnamed: 0,text,label
15,15,tucson weekend read fiamm worth visit review n t justic absolut amaz pizza itali summer better m impress liter make drive tucson pizza thank,5
16,16,staff manag awesom food so so ca n t beat view band play around 6pm friday could hear talk move outsid grouper fish taco flavor french fri good,3
17,17,roach crawl behind shoulder seat kill napkin hungri know thing crazi storm come receiv food hair waiter came back new plate walk without say word told us manag said middl hurrican bug seek go insid place dirti bewar hurrican roach,1
18,18,overpr low qualiti went hibachi first serv salad brown iceberg lettuc m kick take photo phone complet dead clear soup chicken broth soup base ew 7 yr old normal love clear soup would n t touch like ramen noodl chicken packet use freez dri mushroom chef friendli took 30 min come us otherwis dead restaur soy ginger sauc great good flavor veggi great well filet mignon weird kind soapi tast menu green tea ice cream fav mine tri order told rainbow sherbet disappoint 70 later two adult child knew d like back place like ever,2
19,19,lunch famili today aw servic first steak cook ask second ran side instead ask us brought food would like substitut wait come meal brought meal said d 15 minut rest ask manag manag trivial concern tri explain commun process n t fault told commun us first gave state flatli want n t know n t tell re go n t expect custom handl problem manag take initi fix problem free appet phone waitress took phone instead write serious peopl liabil end phone break privaci issu fail use charg anoth person appet attempt hide fact serious stay away one usual good time texa roadhous never go back one,1


In [125]:
backup = train.copy()

In [126]:
# Create your binary and multiclass datasets
binary_df = train.copy()
multiclass_df = train.copy()
test_binary_df = test.copy()
test_multiclass_df = test.copy()

# For binary dataset, get rid of the class 3 in the dataset and map class 1 and 2 to 0, and class 4 and 5 to 1
binary_df.loc[train['label'] == 1,'label'] = 0
binary_df.loc[train['label'] == 2,'label'] = 0
binary_df.loc[train['label'] == 4,'label'] = 1
binary_df.loc[train['label'] == 5,'label'] = 1
binary_df.drop(train[train['label'] == 3].index, inplace = True)

test_binary_df.loc[test['label'] == 1,'label'] = 0
test_binary_df.loc[test['label'] == 2,'label'] = 0
test_binary_df.loc[test['label'] == 4,'label'] = 1
test_binary_df.loc[test['label'] == 5,'label'] = 1
test_binary_df.drop(test[test['label'] == 3].index, inplace = True)


# For multiclass dataset, make sure your classes starts from 0 and goes until 4. (5->4, 4->3, 3->2, 2->1, 1->0)
multiclass_df.loc[train['label'] == 1,'label'] = 0
multiclass_df.loc[train['label'] == 2,'label'] = 1
multiclass_df.loc[train['label'] == 3,'label'] = 2
multiclass_df.loc[train['label'] == 4,'label'] = 3
multiclass_df.loc[train['label'] == 5,'label'] = 4

test_multiclass_df.loc[test['label'] == 1,'label'] = 0
test_multiclass_df.loc[test['label'] == 2,'label'] = 1
test_multiclass_df.loc[test['label'] == 3,'label'] = 2
test_multiclass_df.loc[test['label'] == 4,'label'] = 3
test_multiclass_df.loc[test['label'] == 5,'label'] = 4

In [127]:
binary_df[15:20]

Unnamed: 0.1,Unnamed: 0,text,label
18,18,overpr low qualiti went hibachi first serv salad brown iceberg lettuc m kick take photo phone complet dead clear soup chicken broth soup base ew 7 yr old normal love clear soup would n t touch like ramen noodl chicken packet use freez dri mushroom chef friendli took 30 min come us otherwis dead restaur soy ginger sauc great good flavor veggi great well filet mignon weird kind soapi tast menu green tea ice cream fav mine tri order told rainbow sherbet disappoint 70 later two adult child knew d like back place like ever,0
19,19,lunch famili today aw servic first steak cook ask second ran side instead ask us brought food would like substitut wait come meal brought meal said d 15 minut rest ask manag manag trivial concern tri explain commun process n t fault told commun us first gave state flatli want n t know n t tell re go n t expect custom handl problem manag take initi fix problem free appet phone waitress took phone instead write serious peopl liabil end phone break privaci issu fail use charg anoth person appet attempt hide fact serious stay away one usual good time texa roadhous never go back one,0
20,20,ye crust littl carbon mayb cook woodfir oven simpl tasti thin crust pizza tasti salad good beer wine select probabl one stl s best pizza spot servic fine satisfi experi,1
21,21,amber tri give us old fri n t cold burnt one get togeth,0
23,23,look impress french german dish new orlean pleas go luke restaur know restaur locat central busi district next hilton former mason templ build histor vibe restaur front room lot grand back room seat offset view open kitchen appar want seat peopl reserv sinc one refus seat 9 15pm 6 45pm littl back forth end wait 15 minut seat howev walk restaur back room way mani tabl empti made sens act way lot tabl still remain empti throughout meal decid start meal cocktail origin want mint julep riverbend vodka lemon juic basil syrup blueberri ginger ale recommend server rel sweet cocktail ginger ale power flavor could still tast alcohol friend choos regular starter order cup crawfish bisqu ate assum like complain sour aftertast boyfriend skip appet went straight entre alway seem find basic item menu order luke burger bacon caramel onion tomato swiss chees rave one thing tell good size friend jumbo louisiana shrimp en cocott lump crab meat roast jalapeño chees grit andouil green onion sausag love obsess mussel classic moul et frite although two simpli ingredi garlic thyme broth flavor stood tast lot bolder other lot ingredi hous made fri scrumptiou best kind even food still made room dessert first bread pud delici quit sweet also fan sauc food one n t enough also order bourbon vanilla bean creme brule superb sweet plu littl cooki top ad noth dessert anyth could left good food still give prop server amaz engag front desk sour mood littl bit first made could n t ask anyon better,1


In [128]:
multiclass_df[15:20]

Unnamed: 0.1,Unnamed: 0,text,label
15,15,tucson weekend read fiamm worth visit review n t justic absolut amaz pizza itali summer better m impress liter make drive tucson pizza thank,4
16,16,staff manag awesom food so so ca n t beat view band play around 6pm friday could hear talk move outsid grouper fish taco flavor french fri good,2
17,17,roach crawl behind shoulder seat kill napkin hungri know thing crazi storm come receiv food hair waiter came back new plate walk without say word told us manag said middl hurrican bug seek go insid place dirti bewar hurrican roach,0
18,18,overpr low qualiti went hibachi first serv salad brown iceberg lettuc m kick take photo phone complet dead clear soup chicken broth soup base ew 7 yr old normal love clear soup would n t touch like ramen noodl chicken packet use freez dri mushroom chef friendli took 30 min come us otherwis dead restaur soy ginger sauc great good flavor veggi great well filet mignon weird kind soapi tast menu green tea ice cream fav mine tri order told rainbow sherbet disappoint 70 later two adult child knew d like back place like ever,1
19,19,lunch famili today aw servic first steak cook ask second ran side instead ask us brought food would like substitut wait come meal brought meal said d 15 minut rest ask manag manag trivial concern tri explain commun process n t fault told commun us first gave state flatli want n t know n t tell re go n t expect custom handl problem manag take initi fix problem free appet phone waitress took phone instead write serious peopl liabil end phone break privaci issu fail use charg anoth person appet attempt hide fact serious stay away one usual good time texa roadhous never go back one,0


# Models

## Non-Neural Models

In [129]:
from sklearn.model_selection import GridSearchCV
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score,confusion_matrix,accuracy_score

### Naive Bayes

In [130]:
# https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html

# Create a class for converting sparse matrix output of TfidfVectorizer to dense matrix for feeding into GaussianNB
class DenseTransformer(TransformerMixin):

    def fit(self, X, y=None, **fit_params):
        return self

    def transform(self, X, y=None, **fit_params):
        return X.todense()


# Initiate the pipeline with required components.You can use Pipeline class of sklearn -> https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
# There will be three components; 1) TfidfVectorizer 2) DenseTransformer 3) Naive Bayes classifier.

pipeline = Pipeline([
     ('vectorizer', TfidfVectorizer()), 
     ('to_dense', DenseTransformer()), 
     ('nb_classifier', GaussianNB())
])


# Set the hyperparameter space that will be scanned with GridSearchCV.

parameters = {'vectorizer__min_df':[100,500,1000], 'vectorizer__ngram_range':[[1, 1],[1, 2],[1, 3]]}

### Binary

In [131]:
%%time
# Initialize and run the GridSearchCV to scan the hyperparameter and find the best hyperparameter set that will maximize the scoring option for binary classification.
pipe  = GridSearchCV(pipeline, cv=5,param_grid=parameters)
pipe.fit(binary_df.text, binary_df.label)


# Report the standart deviation of split scores for each hyperparameter group.

split_0 = pipe.cv_results_["split0_test_score"]
split_1 = pipe.cv_results_["split1_test_score"]
split_2 = pipe.cv_results_["split2_test_score"]
split_3 = pipe.cv_results_["split3_test_score"]
split_4 = pipe.cv_results_["split4_test_score"]

#we analyze the mean-std-min-max for each hyperparameter group
for i in range(9):
  std = np.std([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  mean = np.mean([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  min = np.min([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  max = np.max([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  print("hpyerparameter ", i , " :", "std-mean-min-max", round(std,5), round(mean,5), round(min,5), round(max,5))

"""
#we make sure there is no huge differences between splits
print("standard deviations:", round(np.std(split_0),4), round(np.std(split_1),4), round(np.std(split_2),4), round(np.std(split_3),4), round(np.std(split_4),4))
print("mean               :", round(np.mean(split_0),4), round(np.mean(split_1),4), round(np.mean(split_2),4), round(np.mean(split_3),4), round(np.mean(split_4),4))

"""
# Show the best parameter set for given dataset and hyperparameter space.
print(pipe.best_params_)


# Building the pipeline with the best parameter group and reporting Conf. Mat. and Results on the Test Set #
# Create your Pipeline object with the best parameter set.
best_parameters = {'vectorizer__min_df':[100], 'vectorizer__ngram_range':[[1, 2]]}
pipe  = GridSearchCV(pipeline, cv=5,param_grid=best_parameters)

# Fit your pipeline on training set.
pipe.fit(binary_df.text, binary_df.label)

# Take prediction and report the F1 and Accuracy scores for binary classification. Then show the confussion table.
preds = pipe.predict(test_binary_df.text)

print(confusion_matrix(test_binary_df.label, preds))
print("f1 score", f1_score(test_binary_df.label, preds))
print("accuracy score", accuracy_score(test_binary_df.label, preds))


hpyerparameter  0  : std-mean-min-max 0.00514 0.85452 0.84559 0.86047
hpyerparameter  1  : std-mean-min-max 0.00859 0.85993 0.84629 0.87192
hpyerparameter  2  : std-mean-min-max 0.00871 0.85993 0.84629 0.87227
hpyerparameter  3  : std-mean-min-max 0.00483 0.81315 0.80528 0.81888
hpyerparameter  4  : std-mean-min-max 0.0056 0.81273 0.80562 0.82096
hpyerparameter  5  : std-mean-min-max 0.0056 0.81273 0.80562 0.82096
hpyerparameter  6  : std-mean-min-max 0.00687 0.76886 0.75668 0.77793
hpyerparameter  7  : std-mean-min-max 0.00687 0.76886 0.75668 0.77793
hpyerparameter  8  : std-mean-min-max 0.00687 0.76886 0.75668 0.77793
{'vectorizer__min_df': 100, 'vectorizer__ngram_range': [1, 2]}
[[682  91]
 [113 707]]
f1 score 0.8739184177997528
accuracy score 0.871939736346516
CPU times: user 1min 52s, sys: 1.14 s, total: 1min 53s
Wall time: 1min 55s


### Multi

In [132]:
%%time
# Initialize and run the GridSearchCV to scan the hyperparameter and find the best hyperparameter set that will maximize the scoring option for multiclass classification.
pipe  = GridSearchCV(pipeline, cv=5,param_grid=parameters)
pipe.fit(multiclass_df.text, multiclass_df.label)

# Report the standart deviation of split scores for each hyperparameter group.

split_0 = pipe.cv_results_["split0_test_score"]
split_1 = pipe.cv_results_["split1_test_score"]
split_2 = pipe.cv_results_["split2_test_score"]
split_3 = pipe.cv_results_["split3_test_score"]
split_4 = pipe.cv_results_["split4_test_score"]

#we analyze the mean-std-min-max for each hyperparameter group
for i in range(9):
  std = np.std([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  mean = np.mean([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  min = np.min([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  max = np.max([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  print("hpyerparameter ", i , " :", "std-mean-min-max", round(std,5), round(mean,5), round(min,5), round(max,5))

"""
#we make sure there is no huge differences between splits
print("standard deviations:", round(np.std(split_0),4), round(np.std(split_1),4), round(np.std(split_2),4), round(np.std(split_3),4), round(np.std(split_4),4))
print("mean               :", round(np.mean(split_0),4), round(np.mean(split_1),4), round(np.mean(split_2),4), round(np.mean(split_3),4), round(np.mean(split_4),4))

"""
# Show the best parameter set for given dataset and hyperparameter space.
print(pipe.best_params_)

# Building the pipeline with the best parameter group and reporting Conf. Mat. and Results on the Test Set #
# Create your pipeline object with the best parameter set.
best_parameters = {'vectorizer__min_df':[100], 'vectorizer__ngram_range':[[1, 2]]}
pipe  = GridSearchCV(pipeline, cv=5,param_grid=best_parameters)


# Fit your pipeline on training set.
pipe.fit(multiclass_df.text, multiclass_df.label)


# Take prediction and report the F1 and Accuracy scores for binary classification. Then show the confussion table.
preds = pipe.predict(test_multiclass_df.text)

print(confusion_matrix(test_multiclass_df.label, preds))
print("f1 score", f1_score(test_multiclass_df.label, preds, average='macro'))
print("accuracy score", accuracy_score(test_multiclass_df.label, preds))


hpyerparameter  0  : std-mean-min-max 0.00717 0.45433 0.44083 0.46083
hpyerparameter  1  : std-mean-min-max 0.00835 0.46778 0.45528 0.47694
hpyerparameter  2  : std-mean-min-max 0.00921 0.46744 0.45389 0.4775
hpyerparameter  3  : std-mean-min-max 0.00852 0.46033 0.445 0.47028
hpyerparameter  4  : std-mean-min-max 0.00846 0.46056 0.44528 0.46889
hpyerparameter  5  : std-mean-min-max 0.00846 0.46056 0.44528 0.46889
hpyerparameter  6  : std-mean-min-max 0.0028 0.42956 0.42472 0.43194
hpyerparameter  7  : std-mean-min-max 0.0028 0.42956 0.42472 0.43194
hpyerparameter  8  : std-mean-min-max 0.0028 0.42956 0.42472 0.43194
{'vectorizer__min_df': 100, 'vectorizer__ngram_range': [1, 2]}
[[266  88  18   5  16]
 [102 156  72  23  27]
 [ 47  76 118  99  67]
 [ 22  31  60 136 130]
 [ 30  13  20  68 310]]
f1 score 0.4772514205660536
accuracy score 0.493
CPU times: user 2min 22s, sys: 1.47 s, total: 2min 23s
Wall time: 2min 24s


### Logistic Regression

In [136]:
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

# Initiate the pipeline with required components.You can use Pipeline class of sklearn -> https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
# There will be three components; 1) Word weightning 2) Logistic Regression classifier.

pipeline = Pipeline([
     ('vectorizer', TfidfVectorizer()),  
     ('lgc', LogisticRegression(random_state=22, penalty= "elasticnet", solver = "saga"))
])

#Set the hyperparameter space that will be scanned.

parameters = {'lgc__l1_ratio':[0.0,0.5,1.0], 'vectorizer__ngram_range':[[1, 1],[1, 2],[1, 3]], 'vectorizer__min_df':[100,500,1000]}

#### Binary

In [139]:
%%time
# Initialize and run the GridSearchCV to scan the hyperparameter and find the best hyperparameter set that will maximize the scoring option for binary classification.
pipe  = GridSearchCV(pipeline, cv=5,param_grid=parameters)
pipe.fit(binary_df.text, binary_df.label)

        
# Report the standart deviation of split scores for each hyperparameter group.

split_0 = pipe.cv_results_["split0_test_score"]
split_1 = pipe.cv_results_["split1_test_score"]
split_2 = pipe.cv_results_["split2_test_score"]
split_3 = pipe.cv_results_["split3_test_score"]
split_4 = pipe.cv_results_["split4_test_score"]

#we analyze the mean-std-min-max for each hyperparameter group
for i in range(27):
  std = np.std([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  mean = np.mean([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  min = np.min([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  max = np.max([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  print("hpyerparameter ", i , " :", "std-mean-min-max", round(std,5), round(mean,5), round(min,5), round(max,5))

"""
#we make sure there is no huge differences between splits
print("standard deviations:", round(np.std(split_0),4), round(np.std(split_1),4), round(np.std(split_2),4), round(np.std(split_3),4), round(np.std(split_4),4))
print("mean               :", round(np.mean(split_0),4), round(np.mean(split_1),4), round(np.mean(split_2),4), round(np.mean(split_3),4), round(np.mean(split_4),4))

"""

# Show the best parameter set for given dataset and hyperparameter space.
print(pipe.best_params_)


# Building the pipeline with the best parameter group and reporting Conf. Mat. and Results on the Test Set #
# Create your Pipeline object with the best parameter set.
best_parameters = {'vectorizer__min_df':[100], 'vectorizer__ngram_range':[[1, 2]], 'lgc__l1_ratio': [0.5]}
pipe  = GridSearchCV(pipeline, cv=5,param_grid=best_parameters)

# Fit your pipeline on training set.
pipe.fit(binary_df.text, binary_df.label)

# Take prediction and report the F1 and Accuracy scores for binary classification. Then show the confussion table.
preds = pipe.predict(test_binary_df.text)

print(confusion_matrix(test_binary_df.label, preds))
print("f1 score", f1_score(test_binary_df.label, preds))
print("accuracy score", accuracy_score(test_binary_df.label, preds))


hpyerparameter  0  : std-mean-min-max 0.00294 0.90539 0.90038 0.90871
hpyerparameter  1  : std-mean-min-max 0.00397 0.90588 0.90108 0.91218
hpyerparameter  2  : std-mean-min-max 0.00397 0.90588 0.90108 0.91218
hpyerparameter  3  : std-mean-min-max 0.00469 0.85347 0.84832 0.85913
hpyerparameter  4  : std-mean-min-max 0.00442 0.85368 0.84906 0.85913
hpyerparameter  5  : std-mean-min-max 0.00442 0.85368 0.84906 0.85913
hpyerparameter  6  : std-mean-min-max 0.00478 0.79864 0.79389 0.80666
hpyerparameter  7  : std-mean-min-max 0.00478 0.79864 0.79389 0.80666
hpyerparameter  8  : std-mean-min-max 0.00478 0.79864 0.79389 0.80666
hpyerparameter  9  : std-mean-min-max 0.00266 0.90657 0.90281 0.91114
hpyerparameter  10  : std-mean-min-max 0.00305 0.90831 0.9042 0.91357
hpyerparameter  11  : std-mean-min-max 0.00305 0.90831 0.9042 0.91357
hpyerparameter  12  : std-mean-min-max 0.00407 0.85479 0.85005 0.85982
hpyerparameter  13  : std-mean-min-max 0.0038 0.85417 0.84936 0.85878
hpyerparameter  14 

#### Multiclass

In [141]:
%%time
# Initialize and run the GridSearchCV to scan the hyperparameter and find the best hyperparameter set that will maximize the scoring option for multiclass classification.
pipe  = GridSearchCV(pipeline, cv=5,param_grid=parameters)
pipe.fit(multiclass_df.text, multiclass_df.label)

# Report the standart deviation of split scores for each hyperparameter group.

split_0 = pipe.cv_results_["split0_test_score"]
split_1 = pipe.cv_results_["split1_test_score"]
split_2 = pipe.cv_results_["split2_test_score"]
split_3 = pipe.cv_results_["split3_test_score"]
split_4 = pipe.cv_results_["split4_test_score"]

#we analyze the mean-std-min-max for each hyperparameter group
for i in range(27):
  std = np.std([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  mean = np.mean([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  min = np.min([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  max = np.max([split_0[i],split_1[i],split_2[i],split_3[i],split_4[i]])
  print("hpyerparameter ", i , " :", "std-mean-min-max", round(std,5), round(mean,5), round(min,5), round(max,5))


"""
#we make sure there is no huge differences between splits
print("standard deviations:", round(np.std(split_0),4), round(np.std(split_1),4), round(np.std(split_2),4), round(np.std(split_3),4), round(np.std(split_4),4))
print("mean               :", round(np.mean(split_0),4), round(np.mean(split_1),4), round(np.mean(split_2),4), round(np.mean(split_3),4), round(np.mean(split_4),4))
"""

# Show the best parameter set for given dataset and hyperparameter space.
print(pipe.best_params_)


# Building the pipeline with the best parameter group and reporting Conf. Mat. and Results on the Test Set #
# Create your pipeline object with the best parameter set.
best_parameters = {'vectorizer__min_df':[100], 'vectorizer__ngram_range':[[1, 3]], 'lgc__l1_ratio': [1.0]}
pipe  = GridSearchCV(pipeline, cv=5,param_grid=best_parameters)


# Fit your pipeline on training set.
pipe.fit(multiclass_df.text, multiclass_df.label)


# Take prediction and report the F1 and Accuracy scores for binary classification. Then show the confussion table.
preds = pipe.predict(test_multiclass_df.text)

print(confusion_matrix(test_multiclass_df.label, preds))
print("f1 score", f1_score(test_multiclass_df.label, preds, average='macro'))
print("accuracy score", accuracy_score(test_multiclass_df.label, preds))


hpyerparameter  0  : std-mean-min-max 0.00665 0.53556 0.5225 0.54
hpyerparameter  1  : std-mean-min-max 0.00451 0.53761 0.53056 0.54306
hpyerparameter  2  : std-mean-min-max 0.00487 0.53828 0.53028 0.54389
hpyerparameter  3  : std-mean-min-max 0.01043 0.51011 0.49083 0.52028
hpyerparameter  4  : std-mean-min-max 0.00996 0.50961 0.49167 0.51917
hpyerparameter  5  : std-mean-min-max 0.00996 0.50961 0.49167 0.51917
hpyerparameter  6  : std-mean-min-max 0.00645 0.46789 0.45694 0.47639
hpyerparameter  7  : std-mean-min-max 0.00645 0.46789 0.45694 0.47639
hpyerparameter  8  : std-mean-min-max 0.00645 0.46789 0.45694 0.47639
hpyerparameter  9  : std-mean-min-max 0.0072 0.53572 0.52278 0.54222
hpyerparameter  10  : std-mean-min-max 0.00571 0.54139 0.53 0.545
hpyerparameter  11  : std-mean-min-max 0.0056 0.54117 0.53 0.54444
hpyerparameter  12  : std-mean-min-max 0.01076 0.51183 0.4925 0.52167
hpyerparameter  13  : std-mean-min-max 0.011 0.51183 0.49194 0.52167
hpyerparameter  14  : std-mean-mi

## Neural Models

### Convolutional Neural Network (CNN)

In [142]:
import pandas as pd
import numpy as np
import nltk,re
import tensorflow as tf
from sklearn.model_selection import train_test_split
from numpy import array,asarray,zeros

from nltk.stem import PorterStemmer
from nltk.tokenize import sent_tokenize
import nltk
nltk.download('punkt')

import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

from keras.models import Sequential
from keras.layers.convolutional import Conv1D,MaxPooling1D
from keras.layers import Dense,Flatten,Embedding,Input,Dropout
from keras.callbacks import ModelCheckpoint

from gensim.models import Word2Vec
import gensim.downloader as api

from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
binary_df['text'] = binary_df.apply(lambda row: nltk.word_tokenize(row['text']), axis=1)
multiclass_df['text'] = multiclass_df.apply(lambda row: nltk.word_tokenize(row['text']), axis=1)

In [None]:
test_binary_df['text'] = test_binary_df.apply(lambda row: nltk.word_tokenize(row['text']), axis=1)
test_multiclass_df['text'] = test_multiclass_df.apply(lambda row: nltk.word_tokenize(row['text']), axis=1)

In [None]:
# Create a validation set from train set
# Please use random_state of 22 and test_size of 0.1

b_X_train, b_X_val, b_y_train, b_y_val = train_test_split(binary_df.text, binary_df.label, test_size=0.1, random_state=22)

m_X_train, m_X_val, m_y_train, m_y_val = train_test_split(multiclass_df.text, multiclass_df.label, test_size=0.1, random_state=22)

print(b_X_train.shape, b_X_val.shape, b_y_train.shape, b_y_val.shape)
print(m_X_train.shape, m_X_val.shape, m_y_train.shape, m_y_val.shape) #since multiclass did not drop labels with 3, has more dataset.

(12966,) (1441,) (12966,) (1441,)
(16200,) (1800,) (16200,) (1800,)


In [None]:
# Create your own word embeddings from scratch and load a pretrained word embeddings

# You can check https://radimrehurek.com/gensim/models/word2vec.html for training a word embeddings from scratch

m_wv = Word2Vec(sentences= m_X_train, size=70, window=5, min_count=1, workers=4) #multiclass word2vec
b_wv = Word2Vec(sentences= b_X_train, size=70,window=5, min_count=1, workers=4, ) #binary word2vec

# You can check https://radimrehurek.com/gensim/auto_examples/howtos/run_downloader_api.html and 
#   https://github.com/RaRe-Technologies/gensim-data for loading pretrained word embeddings. 

print(list(api.info()['models'].keys()))
api_wv = api.load('glove-wiki-gigaword-50')

['fasttext-wiki-news-subwords-300', 'conceptnet-numberbatch-17-06-300', 'word2vec-ruscorpora-300', 'word2vec-google-news-300', 'glove-wiki-gigaword-50', 'glove-wiki-gigaword-100', 'glove-wiki-gigaword-200', 'glove-wiki-gigaword-300', 'glove-twitter-25', 'glove-twitter-50', 'glove-twitter-100', 'glove-twitter-200', '__testing_word2vec-matrix-synopsis']


In [None]:
#model.wv.vocab
print(m_wv.most_similar("impress"))
print(api_wv.most_similar("impress"))

[('surpris', 0.8139040470123291), ('disappoint', 0.7657565474510193), ('unfortun', 0.7494511008262634), ('stellar', 0.7401197552680969), ('outstand', 0.737817645072937), ('spectacular', 0.7372994422912598), ('satisfi', 0.7323342561721802), ('good', 0.7307410836219788), ('particularli', 0.7235710620880127), ('outweigh', 0.7128151059150696)]
[('eager', 0.807209312915802), ('motivate', 0.7772501111030579), ('convince', 0.7734254598617554), ('tempted', 0.7684953808784485), ('wanting', 0.7526339888572693), ('entertain', 0.7396959662437439), ('reluctant', 0.7380232214927673), ('remind', 0.7234859466552734), ('wishing', 0.7117031812667847), ('tempt', 0.7073939442634583)]


In [None]:
#find out average length in the data
counter = 0
sum = 0
for i in b_X_train:
  sum = sum + len(i)
  counter = counter + 1

sum/counter

59.14915933981182

#-------- BINARY -------

In [None]:
# Prepare your dataset for CNN classifier

tokenizer = Tokenizer(num_words=50000) 
tokenizer.fit_on_texts(b_X_train)
Xcnn_train = tokenizer.texts_to_sequences(b_X_train)
Xcnn_val = tokenizer.texts_to_sequences(b_X_val)
Xcnn_test = tokenizer.texts_to_sequences(test_binary_df.text)

vocab_size = len(tokenizer.word_index) + 1  
print(b_X_train[1])
print(Xcnn_train[1]) 

maxlen = 70
Xcnn_train = pad_sequences(Xcnn_train, padding='post', maxlen=maxlen)
Xcnn_val = pad_sequences(Xcnn_val, padding='post', maxlen=maxlen)
Xcnn_test = pad_sequences(Xcnn_test, padding='post', maxlen=maxlen)
print(Xcnn_train[1, :]) 

['nice', 'first', 'visit', 'owner', 'ted', 'friendli', 'start', 'restaur', 'busi', 'friday', 'night', 'begin', 'chines', 'new', 'year', 'order', 'pu', 'pu', 'platter', 'crab', 'rangoon', 'dinner', 'chef', 'special', 'seafood', 'instead', 'chines', 'american', 'regular', 'fri', 'rice', 'everyth', 'well', 'prepar', 'shrimp', 'larg', 'veget', 'crisp', 'chili', 'sauc', 'sweet', 'tangi', 'return', 'owner', 'made', 'effort', 'visit', 'learn', 'name', 'ask', 'first', 'visit']
[35, 3617, 165, 228, 197, 1974, 320, 417]
[  35 3617  165  228  197 1974  320  417    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0]


# RANDOMLY INITIALIZED

In [None]:
# Create Embedding Matrices and Layers

embedding_dim = 70
textcnnmodel = Sequential()
textcnnmodel.add(Embedding(vocab_size, embedding_dim, input_length=maxlen)) 

textcnnmodel.add(Conv1D(128, 3, activation='relu'))
textcnnmodel.add(MaxPooling1D())
#multi_cnn.add(Dropout(0.2))

textcnnmodel.add(Flatten())
textcnnmodel.add(Dense(16, activation='relu'))
textcnnmodel.add(Dense(1, activation='sigmoid'))
textcnnmodel.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy',f1_m,precision_m, recall_m])
textcnnmodel.summary() 

# Train models and Evaluate them for both binary and multi-class
textcnnmodel.fit(Xcnn_train, b_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(Xcnn_val, b_y_val),
                     batch_size=16)

loss, accuracy, f1_score, precision, recall = textcnnmodel.evaluate(Xcnn_train, b_y_train, verbose=False)
print("Training Accuracy:  {:.4f}".format(accuracy),"Training F1:  {:.4f}".format(f1_score) ,"Training precision:  {:.4f}".format(precision) , "Training recall:  {:.4f}".format(recall))

loss, accuracy, f1_score, precision, recall = textcnnmodel.evaluate(Xcnn_test, test_binary_df.label, verbose=False)
print("Testing Accuracy :  {:.4f}".format(accuracy),"Testing F1:  {:.4f}".format(f1_score) ,"Testing precision:  {:.4f}".format(precision) , "Testing recall:  {:.4f}".format(recall))

Model: "sequential_29"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_29 (Embedding)    (None, 70, 70)            1473500   
                                                                 
 conv1d_35 (Conv1D)          (None, 68, 128)           27008     
                                                                 
 max_pooling1d_35 (MaxPoolin  (None, 34, 128)          0         
 g1D)                                                            
                                                                 
 flatten_29 (Flatten)        (None, 4352)              0         
                                                                 
 dense_58 (Dense)            (None, 16)                69648     
                                                                 
 dense_59 (Dense)            (None, 1)                 17        
                                                     

In [None]:
embedding_dim = 70
textcnnmodel = Sequential()
textcnnmodel.add(Embedding(vocab_size, embedding_dim, input_length=maxlen)) 

textcnnmodel.add(Conv1D(128, 5, activation='relu'))
textcnnmodel.add(MaxPooling1D())

textcnnmodel.add(Conv1D(64, 5, activation='relu'))
textcnnmodel.add(MaxPooling1D())

textcnnmodel.add(Flatten())
textcnnmodel.add(Dense(32, activation='relu'))
textcnnmodel.add(Dense(1, activation='sigmoid'))
textcnnmodel.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy',f1_m,precision_m, recall_m])
textcnnmodel.summary() 

# Train models and Evaluate them for both binary and multi-class
textcnnmodel.fit(Xcnn_train, b_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(Xcnn_val, b_y_val),
                     batch_size=32)

loss, accuracy, f1_score, precision, recall = textcnnmodel.evaluate(Xcnn_train, b_y_train, verbose=False)
print("Training Accuracy:  {:.4f}".format(accuracy),"Training F1:  {:.4f}".format(f1_score) ,"Training precision:  {:.4f}".format(precision) , "Training recall:  {:.4f}".format(recall))

loss, accuracy, f1_score, precision, recall = textcnnmodel.evaluate(Xcnn_test, test_binary_df.label, verbose=False)
print("Testing Accuracy :  {:.4f}".format(accuracy),"Testing F1:  {:.4f}".format(f1_score) ,"Testing precision:  {:.4f}".format(precision) , "Testing recall:  {:.4f}".format(recall))

Model: "sequential_30"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_30 (Embedding)    (None, 70, 70)            1473500   
                                                                 
 conv1d_36 (Conv1D)          (None, 66, 128)           44928     
                                                                 
 max_pooling1d_36 (MaxPoolin  (None, 33, 128)          0         
 g1D)                                                            
                                                                 
 conv1d_37 (Conv1D)          (None, 29, 64)            41024     
                                                                 
 max_pooling1d_37 (MaxPoolin  (None, 14, 64)           0         
 g1D)                                                            
                                                                 
 flatten_30 (Flatten)        (None, 896)             

# Word embeddings trained from scratch with gensim

In [None]:
from gensim.models import KeyedVectors

vocab_len = len(b_wv.wv.vocab) + 1

b_wv.save("b_wv.wordvectors")
embedding_vector = KeyedVectors.load("b_wv.wordvectors", mmap='r')

embedding_matrix = np.zeros((vocab_len-1, 70))
for word, i in tokenizer.word_index.items():
    try:
      #print(embedding_vector[word].shape, word)
      embedding_matrix[i-1] = embedding_vector[word]
    except KeyError:
      print("key error", i, word)
  

In [None]:
wv_model = Sequential()
#wv_model.add(b_wv.wv.get_keras_embedding(True)) 
wv_model.add(Embedding(vocab_len-1, 70, input_length=70, weights=[embedding_matrix], trainable=True))

wv_model.add(Conv1D(128, 3, activation='relu'))
wv_model.add(MaxPooling1D())

wv_model.add(Flatten())
wv_model.add(Dense(16, activation='relu'))
wv_model.add(Dense(1, activation='sigmoid'))
wv_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy',f1_m,precision_m, recall_m])
wv_model.summary() 

# Train models and Evaluate them for both binary and multi-class
wv_model.fit(Xcnn_train, b_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(Xcnn_val, b_y_val),
                     batch_size=16)


loss, accuracy, f1_score, precision, recall = wv_model.evaluate(Xcnn_train, b_y_train, verbose=False)
print("Training Accuracy:  {:.4f}".format(accuracy),"Training F1:  {:.4f}".format(f1_score) ,"Training precision:  {:.4f}".format(precision) , "Training recall:  {:.4f}".format(recall))

loss, accuracy, f1_score, precision, recall = wv_model.evaluate(Xcnn_test, test_binary_df.label, verbose=False)
print("Testing Accuracy :  {:.4f}".format(accuracy),"Testing F1:  {:.4f}".format(f1_score) ,"Testing precision:  {:.4f}".format(precision) , "Testing recall:  {:.4f}".format(recall))

Model: "sequential_31"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_31 (Embedding)    (None, 70, 70)            1347290   
                                                                 
 conv1d_38 (Conv1D)          (None, 68, 128)           27008     
                                                                 
 max_pooling1d_38 (MaxPoolin  (None, 34, 128)          0         
 g1D)                                                            
                                                                 
 flatten_31 (Flatten)        (None, 4352)              0         
                                                                 
 dense_62 (Dense)            (None, 16)                69648     
                                                                 
 dense_63 (Dense)            (None, 1)                 17        
                                                     

In [None]:
wv_model = Sequential()
wv_model.add(Embedding(vocab_len-1, 70, input_length=70, weights=[embedding_matrix], trainable=True))

wv_model.add(Conv1D(128, 5, activation='relu'))
wv_model.add(MaxPooling1D())

wv_model.add(Conv1D(64, 5, activation='relu'))
wv_model.add(MaxPooling1D())

wv_model.add(Flatten())
wv_model.add(Dense(10, activation='relu'))
wv_model.add(Dense(1, activation='sigmoid'))
wv_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy',f1_m,precision_m, recall_m])
wv_model.summary() 

# Train models and Evaluate them for both binary and multi-class
wv_model.fit(Xcnn_train, b_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(Xcnn_val, b_y_val),
                     batch_size=16)

loss, accuracy, f1_score, precision, recall = wv_model.evaluate(Xcnn_train, b_y_train, verbose=False)
print("Training Accuracy:  {:.4f}".format(accuracy),"Training F1:  {:.4f}".format(f1_score) ,"Training precision:  {:.4f}".format(precision) , "Training recall:  {:.4f}".format(recall))

loss, accuracy, f1_score, precision, recall = wv_model.evaluate(Xcnn_test, test_binary_df.label, verbose=False)
print("Testing Accuracy :  {:.4f}".format(accuracy),"Testing F1:  {:.4f}".format(f1_score) ,"Testing precision:  {:.4f}".format(precision) , "Testing recall:  {:.4f}".format(recall))

Model: "sequential_32"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_32 (Embedding)    (None, 70, 70)            1347290   
                                                                 
 conv1d_39 (Conv1D)          (None, 66, 128)           44928     
                                                                 
 max_pooling1d_39 (MaxPoolin  (None, 33, 128)          0         
 g1D)                                                            
                                                                 
 conv1d_40 (Conv1D)          (None, 29, 64)            41024     
                                                                 
 max_pooling1d_40 (MaxPoolin  (None, 14, 64)           0         
 g1D)                                                            
                                                                 
 flatten_32 (Flatten)        (None, 896)             

# Pretrained word embeddings from gensim.api

In [None]:
api_wv.save("api_wv.wordvectors")
embedding_vector = KeyedVectors.load("api_wv.wordvectors", mmap='r')

embedding_matrix = np.zeros((vocab_len-1, 50))

unknown_counter = 0
for word, i in tokenizer.word_index.items():
    try:
      #print(embedding_vector[word], word)
      embedding_matrix[i-1] = embedding_vector[word]
    except KeyError:
      #print("key error", i, word)
      unknown_counter = unknown_counter + 1

#print(unknown_counter) 

In [None]:
api_model = Sequential()
api_model.add(Embedding(vocab_len-1, 50, input_length=70, weights=[embedding_matrix], trainable=True))

api_model.add(Conv1D(128, 3, activation='relu'))
api_model.add(MaxPooling1D())

api_model.add(Flatten())
api_model.add(Dense(10, activation='relu'))
api_model.add(Dense(1, activation='sigmoid'))
api_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy',f1_m,precision_m, recall_m])
api_model.summary() 

# Train models and Evaluate them for both binary and multi-class
api_model.fit(Xcnn_train, b_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(Xcnn_val, b_y_val),
                     batch_size=32)

loss, accuracy, f1_score, precision, recall = api_model.evaluate(Xcnn_train, b_y_train, verbose=False)
print("Training Accuracy:  {:.4f}".format(accuracy),"Training F1:  {:.4f}".format(f1_score) ,"Training precision:  {:.4f}".format(precision) , "Training recall:  {:.4f}".format(recall))

loss, accuracy, f1_score, precision, recall = api_model.evaluate(Xcnn_test, test_binary_df.label, verbose=False)
print("Testing Accuracy :  {:.4f}".format(accuracy),"Testing F1:  {:.4f}".format(f1_score) ,"Testing precision:  {:.4f}".format(precision) , "Testing recall:  {:.4f}".format(recall))

Model: "sequential_33"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_33 (Embedding)    (None, 70, 50)            962350    
                                                                 
 conv1d_41 (Conv1D)          (None, 68, 128)           19328     
                                                                 
 max_pooling1d_41 (MaxPoolin  (None, 34, 128)          0         
 g1D)                                                            
                                                                 
 flatten_33 (Flatten)        (None, 4352)              0         
                                                                 
 dense_66 (Dense)            (None, 10)                43530     
                                                                 
 dense_67 (Dense)            (None, 1)                 11        
                                                     

In [None]:
api_model = Sequential()
api_model.add(Embedding(vocab_len-1, 50, input_length=70, weights=[embedding_matrix], trainable=True))

api_model.add(Conv1D(128, 5, activation='relu'))
api_model.add(MaxPooling1D())
api_model.add(Dropout(0.2))

api_model.add(Conv1D(64, 5, activation='relu'))
api_model.add(MaxPooling1D())
api_model.add(Dropout(0.2))

api_model.add(Flatten())
api_model.add(Dense(10, activation='relu'))
api_model.add(Dense(1, activation='sigmoid'))
api_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy',f1_m,precision_m, recall_m])
api_model.summary() 

# Train models and Evaluate them for both binary and multi-class

api_model.fit(Xcnn_train, b_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(Xcnn_val, b_y_val),
                     batch_size=32)

loss, accuracy, f1_score, precision, recall = api_model.evaluate(Xcnn_train, b_y_train, verbose=False)
print("Training Accuracy:  {:.4f}".format(accuracy),"Training F1:  {:.4f}".format(f1_score) ,"Training precision:  {:.4f}".format(precision) , "Training recall:  {:.4f}".format(recall))

loss, accuracy, f1_score, precision, recall = api_model.evaluate(Xcnn_test, test_binary_df.label, verbose=False)
print("Testing Accuracy :  {:.4f}".format(accuracy),"Testing F1:  {:.4f}".format(f1_score) ,"Testing precision:  {:.4f}".format(precision) , "Testing recall:  {:.4f}".format(recall))

Model: "sequential_34"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_34 (Embedding)    (None, 70, 50)            962350    
                                                                 
 conv1d_42 (Conv1D)          (None, 66, 128)           32128     
                                                                 
 max_pooling1d_42 (MaxPoolin  (None, 33, 128)          0         
 g1D)                                                            
                                                                 
 dropout_10 (Dropout)        (None, 33, 128)           0         
                                                                 
 conv1d_43 (Conv1D)          (None, 29, 64)            41024     
                                                                 
 max_pooling1d_43 (MaxPoolin  (None, 14, 64)           0         
 g1D)                                                

# -------- MULTICLASS ---------------

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 1, 5)))
    possible_positives = K.sum(K.round(K.clip(y_true, 1, 5)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 1, 5)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 1, 5)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


In [None]:
# Prepare your dataset for CNN classifier

tokenizer = Tokenizer(num_words=50000)
tokenizer.fit_on_texts(m_X_train)
m_Xcnn_train = tokenizer.texts_to_sequences(m_X_train)
m_Xcnn_val = tokenizer.texts_to_sequences(m_X_val)
m_Xcnn_test = tokenizer.texts_to_sequences(test_multiclass_df.text)

vocab_size = len(tokenizer.word_index) + 1  
print(m_X_train[1])
print(m_Xcnn_train[1]) 

maxlen = 70 
m_Xcnn_train = pad_sequences(m_Xcnn_train, padding='post', maxlen=maxlen)
m_Xcnn_val = pad_sequences(m_Xcnn_val, padding='post', maxlen=maxlen)
m_Xcnn_test = pad_sequences(m_Xcnn_test, padding='post', maxlen=maxlen)
print(m_Xcnn_train[0, :]) 

['nice', 'first', 'visit', 'owner', 'ted', 'friendli', 'start', 'restaur', 'busi', 'friday', 'night', 'begin', 'chines', 'new', 'year', 'order', 'pu', 'pu', 'platter', 'crab', 'rangoon', 'dinner', 'chef', 'special', 'seafood', 'instead', 'chines', 'american', 'regular', 'fri', 'rice', 'everyth', 'well', 'prepar', 'shrimp', 'larg', 'veget', 'crisp', 'chili', 'sauc', 'sweet', 'tangi', 'return', 'owner', 'made', 'effort', 'visit', 'learn', 'name', 'ask', 'first', 'visit']
[1234, 218, 299, 14, 169, 156, 104, 166, 53, 327, 42, 434, 827, 233, 179, 83, 765, 94, 20, 41, 21, 16, 324, 260, 161, 51, 251, 755, 162, 269, 1, 2, 10, 16, 11, 6, 1234, 717, 210, 257, 179, 823, 96]
[   76    40   224   160    69   784    10   142   371   442   387   493
    10  1578    61    44  4175  2301  2816 12713     9   344   487     8
    11    44   353  1110  4176   137    11  6331   325   138     9     1
     2    10    10  2979    76     3  2371  4696     9   123   867   927
   784  4696   854     3   142     0

# RANDOMLY INITIALIZED

In [None]:
# Create Embedding Matrices and Layers - RANDOMLY INITIALIZED
from sklearn.metrics import classification_report

embedding_dim = 70
multi_cnn = Sequential()
multi_cnn.add(Embedding(vocab_size, embedding_dim, input_length=maxlen)) 

multi_cnn.add(Conv1D(256, 3, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Flatten())
multi_cnn.add(Dense(20, activation='relu'))
multi_cnn.add(Dense(5, activation='softmax')) #added 5 neurons for output layer using softmax
multi_cnn.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy',f1_m,precision_m, recall_m]) #changed loss
multi_cnn.summary() 


# Train models and Evaluate them for both binary and multi-class
multi_cnn.fit(m_Xcnn_train, m_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(m_Xcnn_val, m_y_val),
                     batch_size=16)

print("")

loss, accuracy, f1_score, precision, recall = multi_cnn.evaluate(m_Xcnn_train, m_y_train, verbose=False)
print("Training Accuracy:  {:.4f}".format(accuracy),"Training F1:  {:.4f}".format(f1_score) ,"Training precision:  {:.4f}".format(precision) , "Training recall:  {:.4f}".format(recall))

print("Test Scores:")
preds = multi_cnn.predict(m_Xcnn_test)
indices_p = np.argmax(preds, axis = 1)
indices_p
print(classification_report(test_multiclass_df.label,indices_p))

Model: "sequential_39"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_39 (Embedding)    (None, 70, 70)            1473500   
                                                                 
 conv1d_49 (Conv1D)          (None, 68, 256)           54016     
                                                                 
 max_pooling1d_49 (MaxPoolin  (None, 34, 256)          0         
 g1D)                                                            
                                                                 
 dropout_17 (Dropout)        (None, 34, 256)           0         
                                                                 
 flatten_39 (Flatten)        (None, 8704)              0         
                                                                 
 dense_78 (Dense)            (None, 20)                174100    
                                                     

In [None]:
# Create Embedding Matrices and Layers - RANDOMLY INITIALIZED

embedding_dim = 70
multi_cnn = Sequential()
multi_cnn.add(Embedding(vocab_size, embedding_dim, input_length=maxlen)) 

multi_cnn.add(Conv1D(64, 5, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Conv1D(32, 5, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Flatten())
multi_cnn.add(Dense(20, activation='relu'))
multi_cnn.add(Dense(5, activation='softmax')) #added 5 neurons for output layer using softmax
multi_cnn.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy']) #changed loss
multi_cnn.summary() 


# Train models and Evaluate them for both binary and multi-class
multi_cnn.fit(m_Xcnn_train, m_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(m_Xcnn_val, m_y_val),
                     batch_size=16)

loss, accuracy = multi_cnn.evaluate(m_Xcnn_train, m_y_train, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))

print("Test Scores:")
preds = multi_cnn.predict(m_Xcnn_test)
indices_p = np.argmax(preds, axis = 1)
indices_p
print(classification_report(test_multiclass_df.label,indices_p))

Model: "sequential_38"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_38 (Embedding)    (None, 70, 70)            1473500   
                                                                 
 conv1d_47 (Conv1D)          (None, 66, 64)            22464     
                                                                 
 max_pooling1d_47 (MaxPoolin  (None, 33, 64)           0         
 g1D)                                                            
                                                                 
 dropout_15 (Dropout)        (None, 33, 64)            0         
                                                                 
 conv1d_48 (Conv1D)          (None, 29, 32)            10272     
                                                                 
 max_pooling1d_48 (MaxPoolin  (None, 14, 32)           0         
 g1D)                                                

# Word embeddings trained from scratch with gensim

In [None]:
from gensim.models import KeyedVectors

vocab_len = len(m_wv.wv.vocab) + 2

m_wv.save("m_wv.wordvectors")
embedding_vector = KeyedVectors.load("m_wv.wordvectors", mmap='r')

embedding_matrix = np.zeros((vocab_len-1, 70))
for word, i in tokenizer.word_index.items():
    try:
      #print(embedding_vector[word].shape, word)
      embedding_matrix[i-1] = embedding_vector[word]
    except KeyError:
      print("key error", i, word)
  

In [None]:
multi_cnn = Sequential()
multi_cnn.add(Embedding(vocab_len-1, 70, input_length=70, weights=[embedding_matrix], trainable=True))

multi_cnn.add(Conv1D(256, 3, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Flatten())
multi_cnn.add(Dense(20, activation='relu'))
multi_cnn.add(Dense(5, activation='softmax')) #added 5 neurons for output layer using softmax

multi_cnn.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy']) #changed loss
multi_cnn.summary() 

# Train models and Evaluate them for both binary and multi-class
multi_cnn.fit(m_Xcnn_train, m_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(m_Xcnn_val, m_y_val),
                     batch_size=16)

loss, accuracy = multi_cnn.evaluate(m_Xcnn_train, m_y_train, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))

print("Test Scores:")
preds = multi_cnn.predict(m_Xcnn_test)
indices_p = np.argmax(preds, axis = 1)
indices_p
print(classification_report(test_multiclass_df.label,indices_p))

Model: "sequential_40"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_40 (Embedding)    (None, 70, 70)            1473500   
                                                                 
 conv1d_50 (Conv1D)          (None, 68, 256)           54016     
                                                                 
 max_pooling1d_50 (MaxPoolin  (None, 34, 256)          0         
 g1D)                                                            
                                                                 
 dropout_18 (Dropout)        (None, 34, 256)           0         
                                                                 
 flatten_40 (Flatten)        (None, 8704)              0         
                                                                 
 dense_80 (Dense)            (None, 20)                174100    
                                                     

In [None]:
multi_cnn = Sequential()
multi_cnn.add(Embedding(vocab_len-1, 70, input_length=70, weights=[embedding_matrix], trainable=True))

multi_cnn.add(Conv1D(64, 5, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Conv1D(32, 5, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Flatten())
multi_cnn.add(Dense(20, activation='relu'))
multi_cnn.add(Dense(5, activation='softmax')) #added 5 neurons for output layer using softmax

multi_cnn.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy']) #changed loss
multi_cnn.summary() 

# Train models and Evaluate them for both binary and multi-class
multi_cnn.fit(m_Xcnn_train, m_y_train,
                     epochs=10,
                     verbose=True,
                     validation_data=(m_Xcnn_val, m_y_val),
                     batch_size=32)

loss, accuracy = multi_cnn.evaluate(m_Xcnn_train, m_y_train, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))


print("Test Scores:")
preds = multi_cnn.predict(m_Xcnn_test)
indices_p = np.argmax(preds, axis = 1)
indices_p
print(classification_report(test_multiclass_df.label,indices_p))


Model: "sequential_41"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_41 (Embedding)    (None, 70, 70)            1473500   
                                                                 
 conv1d_51 (Conv1D)          (None, 66, 64)            22464     
                                                                 
 max_pooling1d_51 (MaxPoolin  (None, 33, 64)           0         
 g1D)                                                            
                                                                 
 dropout_19 (Dropout)        (None, 33, 64)            0         
                                                                 
 conv1d_52 (Conv1D)          (None, 29, 32)            10272     
                                                                 
 max_pooling1d_52 (MaxPoolin  (None, 14, 32)           0         
 g1D)                                                

#Pretrained word embeddings from gensim.api

In [None]:
api_wv.save("api_wv.wordvectors")
embedding_vector = KeyedVectors.load("api_wv.wordvectors", mmap='r')

embedding_matrix = np.zeros((vocab_len, 50))

unknown_counter = 0
for word, i in tokenizer.word_index.items():
    try:
      #print(embedding_vector[word], word)
      embedding_matrix[i-1] = embedding_vector[word]
    except KeyError:
      #print("key error", i, word)
      unknown_counter = unknown_counter + 1

#print(unknown_counter) 

In [None]:
multi_cnn = Sequential()
multi_cnn.add(Embedding(vocab_len, 50, input_length=70, weights=[embedding_matrix], trainable=True))

multi_cnn.add(Conv1D(256, 3, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Flatten())
multi_cnn.add(Dense(10, activation='relu'))
multi_cnn.add(Dense(5, activation='softmax')) #added 5 neurons for output layer using softmax
multi_cnn.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy']) #changed loss
multi_cnn.summary() 


# Train models and Evaluate them for both binary and multi-class
multi_cnn.fit(m_Xcnn_train, m_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(m_Xcnn_val, m_y_val),
                     batch_size=32)

loss, accuracy = multi_cnn.evaluate(m_Xcnn_train, m_y_train, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))


print("Test Scores:")
preds = multi_cnn.predict(m_Xcnn_test)
indices_p = np.argmax(preds, axis = 1)
indices_p
print(classification_report(test_multiclass_df.label,indices_p))


Model: "sequential_42"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_42 (Embedding)    (None, 70, 50)            1052550   
                                                                 
 conv1d_53 (Conv1D)          (None, 68, 256)           38656     
                                                                 
 max_pooling1d_53 (MaxPoolin  (None, 34, 256)          0         
 g1D)                                                            
                                                                 
 dropout_21 (Dropout)        (None, 34, 256)           0         
                                                                 
 flatten_42 (Flatten)        (None, 8704)              0         
                                                                 
 dense_84 (Dense)            (None, 10)                87050     
                                                     

In [None]:
multi_cnn = Sequential()
multi_cnn.add(Embedding(vocab_len, 50, input_length=70, weights=[embedding_matrix], trainable=True))

multi_cnn.add(Conv1D(128, 5, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Conv1D(64, 5, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Conv1D(32, 5, activation='relu'))
multi_cnn.add(MaxPooling1D())
multi_cnn.add(Dropout(0.2))

multi_cnn.add(Flatten())
multi_cnn.add(Dense(10, activation='relu'))
multi_cnn.add(Dense(5, activation='softmax')) #added 5 neurons for output layer using softmax
multi_cnn.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy']) #changed loss
multi_cnn.summary() 


# Train models and Evaluate them for both binary and multi-class
multi_cnn.fit(m_Xcnn_train, m_y_train,
                     epochs=5,
                     verbose=True,
                     validation_data=(m_Xcnn_val, m_y_val),
                     batch_size=32)

loss, accuracy = multi_cnn.evaluate(m_Xcnn_train, m_y_train, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))

print("Test Scores:")
preds = multi_cnn.predict(m_Xcnn_test)
indices_p = np.argmax(preds, axis = 1)
indices_p
print(classification_report(test_multiclass_df.label,indices_p))

Model: "sequential_43"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_43 (Embedding)    (None, 70, 50)            1052550   
                                                                 
 conv1d_54 (Conv1D)          (None, 66, 128)           32128     
                                                                 
 max_pooling1d_54 (MaxPoolin  (None, 33, 128)          0         
 g1D)                                                            
                                                                 
 dropout_22 (Dropout)        (None, 33, 128)           0         
                                                                 
 conv1d_55 (Conv1D)          (None, 29, 64)            41024     
                                                                 
 max_pooling1d_55 (MaxPoolin  (None, 14, 64)           0         
 g1D)                                                

In [None]:
#REFERENCES:

#https://www.kaggle.com/code/jagannathrk/word2vec-cnn-text-classification/notebook

## My Report

#***Naive Bayes***: 

*  Binary


```
Best parameters selected as:  

{'vectorizer__min_df': 100, 'vectorizer__ngram_range': [1, 2]}

[[682  91]
 [113 707]]

f1 score 0.8739184177997528
accuracy score 0.871939736346516

```


*  Multiclass


```
Best parameters selected as:  

{'vectorizer__min_df': 100, 'vectorizer__ngram_range': [1, 2]}

Confusion matrix: 

[[266  88  18   5  16]
 [102 156  72  23  27]
 [ 47  76 118  99  67]
 [ 22  31  60 136 130]
 [ 30  13  20  68 310]]


f1 score 0.4772514205660536
accuracy score 0.493

```
Comments on the findings:

Confusion matrix gives a significant insight about our accuracy and f1 score: Multiclass classification result has been decreased significantly compared to the binary task due to the difficulty of discriminating the labels 3 and 4 as well as 0 and 1 from each other. Therefore, results do not imply that multiclass classification failed. After examining the results of hyperparameter combinations, there is approximately 0.1 difference for the binary task and around 0.05 difference for the multiclass task. All in all we obtained, 0.88 f1 score for the binary task given that the simple baseline would be 0.5 while we obtained 0.5 for the multiclass task given that simple baseline would be 0.20. 



#***Logistic regression:***

*   Binary

```
Best parameters selected as:  

{'lgc__l1_ratio': 0.5, 'vectorizer__min_df': 100, 'vectorizer__ngram_range': [1, 2]}

[[714  59]
 [ 85 735]]

f1 score 0.9107806691449815
accuracy score 0.9096045197740112

```


*   Multiclass

```
Best parameters selected as: 

{'lgc__l1_ratio': 1.0, 'vectorizer__min_df': 100, 'vectorizer__ngram_range': [1, 3]}

[[289  81  13   6   4]
 [ 89 179  78  22  12]
 [ 26  78 189  92  22]
 [ 14  27  73 169  96]
 [ 12  12  19 113 285]]

f1 score 0.5512069432954926
accuracy score 0.5555

```

Comments on the findings:

We obtained much better results compared to naive bayes multiclass model. In detail, our f1 score and accuracy score has been increased around 0.08 with 0.55. There is a similar confusion matrix just like we had in naive bayes case. Model is very successful if we could merge 2 positive and 2 negative classes into 1 positive and 1 negative. Again, our model outperformed simple baselines by 0.40 and 0.35 for the binary task and multiclass task respectively.


#***CNN:***

In the cnn part, we have tried 3 different embedding strategies: Randomly initialized word embeddings, Word embeddings trained from scratch with gensim, Pretrained word embeddings from gensim.api. In order to do so:


For the Randomly initialized word embeddings, we set embedding dimension, vocabulary size and input length parameters in the embedding layer of our network. So that, weights in the embedding matrix will be initialized randomly.


For the Word embeddings trained from scratch with gensim, we  import our trained word2vec model using keyedvectors feature of gensim.models and form the embedding vector. Using the embedding vector, we create the embedding matrix which will be given as a parameter in the embedding layer of our network.


For the Pretrained word embeddings from gensim.api, we  import our pre-trained api model using keyedvectors feature of gensim.models and form the embedding vector: Using the embedding vector, we create the embedding matrix which will be given as a parameter in the embedding layer of our network. 


*   Binary

We prepared our dataset for cnn model by fitting our tokenizer to the binary dataset. Then, we transform each text in texts to a sequence of integers using texts_to_sequences method. We set a maxlen parameter after taking the average length of each text and we apply padding.   

After preparing the dataset, we continue by building our model: 

For the output layer, we set our activation function as sigmoid since our task is binary classification and we want our output 0 or 1. Then, we set loss function binary_crossentropy for this task. 

```
binary_model.add(Dense(1, activation='sigmoid'))
binary_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
```

We have built 2 different models for each embedding strategy -> in total 6 models for the binary task. Using randomly initialized embedding layer, we obtained around 88 percent accuracy in the test dataset. Despite adding another convolutional layer and changing filter size, we do not observe a significant difference between trained models. Using word embeddings trained from scratch with gensim, we obtained around 89 percent accuracy in the test set which is slightly better than randomly initiliazed word embedding results. Last but not least, we obtained around 89 percent accuracy in the test dataset using pretrained word embeddings from gensim.api. However, we come across an interesting output for the first trained model: model was not able to decrease the loss, thus did not learn. Increasing the number of convolutional layer, kernel size and filter size definitely helped model. In a nutshell, we conlude that each models produced decent scores with the well-prepared models and parameters.  

**Results:**

```
Randomly initiliazed Test Scores:
Testing Accuracy :  0.8908 Testing F1:  0.8918 Testing precision:  0.8990 Testing recall:  0.8899


Word embeddings trained from scratch Test Scores:
Testing Accuracy :  0.8933 Testing F1:  0.8935 Testing precision:  0.9123 Testing recall:  0.8806


Pre-trained API Test Scores:
Testing Accuracy :  0.8719 Testing F1:  0.8690 Testing precision:  0.9125 Testing recall:  0.8332
```



*   Multiclass

We prepared our dataset for cnn model by fitting our tokenizer to the multiclass dataset. Then, we transform each text in texts to a sequence of integers using texts_to_sequences method. We set a maxlen parameter after taking the average length of each text and we apply padding.   

After preparing the dataset, we continue by building our model: 

Apart from a few differences, we built our model just like binary case. What differs in this case is:  the output layer. We use softmax activation function instead of sigmoid and we use 5 neurons in the output layer. Furthermore, we modified the loss function as sparse categorical crossentropy. 

```
multi_cnn.add(Dense(5, activation='softmax')) #added 5 neurons for output layer using softmax
multi_cnn.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy']) 
```
We have built 2 different models for each embedding strategy -> in total 6 models for the multiclass task. Using randomly initialized embedding layer, we obtained around 50 percent accuracy in the test dataset. Despite adding another convolutional layer and changing filter size, we do not observe a significant difference between trained models. Using word embeddings trained from scratch with gensim, we obtained around 52 percent accuracy in the test set which is slightly better than randomly initiliazed word embedding results. Interestingly, first trained model did not end up learning and resulted around 20 percent accuracy in the test set. Probably, model complexity was not sufficient to handle multiclass classification task. Because, increasing the number of convolutional layer, kernel size and filter size definitely helped model. Last but not least, we obtained around 45 percent accuracy in the test dataset using pretrained word embeddings from gensim.api. However, we come across an interesting output for the first trained model: model was not able to decrease the loss, thus did not learn and end up 19 percent accuracy in the test set. Similarly, increasing the number of convolutional layer, kernel size and filter size definitely helped model. In a nutshell, we conlude that each models produced decent scores with the well-prepared models and parameters. Taking into account the baseline is 20 percent for a 5 class classification task, models did a good job. In terms of embedding layer effect, we can conclude that, using a pre-trained embedding layer is not good as much as training a model from scratch as expected.  



**Results:**

```
Randomly initiliazed Test Scores:
             precision    recall  f1-score   support

           0       0.72      0.59      0.65       393
           1       0.45      0.51      0.48       380
           2       0.43      0.47      0.45       407
           3       0.39      0.46      0.42       379
           4       0.69      0.56      0.62       441

    accuracy                           0.52      2000
   macro avg       0.54      0.52      0.52      2000
weighted avg       0.54      0.52      0.53      2000

```



```
Word embeddings trained from scratch Test Scores:
              precision    recall  f1-score   support

           0       0.66      0.59      0.62       393
           1       0.37      0.41      0.39       380
           2       0.38      0.39      0.38       407
           3       0.38      0.47      0.42       379
           4       0.69      0.53      0.60       441

    accuracy                           0.48      2000
   macro avg       0.49      0.48      0.48      2000
weighted avg       0.50      0.48      0.49      2000

```



```
Pre-trained API Test Scores:
              precision    recall  f1-score   support

           0       0.66      0.64      0.65       393
           1       0.42      0.45      0.44       380
           2       0.35      0.53      0.42       407
           3       0.36      0.25      0.30       379
           4       0.67      0.51      0.58       441

    accuracy                           0.48      2000
   macro avg       0.49      0.48      0.48      2000
weighted avg       0.50      0.48      0.48      2000

```

