# Mini Project 1

In [1]:
pip install aspose-words

Note: you may need to restart the kernel to use updated packages.


In [None]:
#mount drive for importing datasets
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#necessary imports
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
import seaborn as sns
import aspose.words as aw
from sklearn.metrics import confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.models import KeyedVectors
import nltk
from nltk.tokenize import word_tokenize

In [None]:
# create document object
doc = aw.Document()

# create a document builder object
builder = aw.DocumentBuilder(doc)

## 1. Dataset Preparation & Analysis

First its necessary to import our data

In [None]:
df=pd.read_json("drive/MyDrive/Colab Notebooks/Artifical Intelligence/goemotions.json")
df= df.rename(columns={0: "Text", 1:"Emotion",2:"Sentiment"})
df

#### 1.3 
"Extract the posts and the 2 sets of labels (emotion and sentiment), then plot the distribution
of the posts in each category and save the graphic (a histogram or pie chart) in pdf. Do this for both
the emotion and the sentiment categories"

In [None]:
fig, ax = plt.subplots(2,1, figsize=(15,15))
df['Emotion'].value_counts().plot(ax=ax[0], kind='bar')
df['Sentiment'].value_counts().plot(ax=ax[1], kind='bar')

fig.savefig("graphs.pdf", bbox_inches='tight')

## 2. Words as Features

#### 2.1
"Process the dataset using feature extraction.text.CountVectorizer to extract tokens/words
and their frequencies. Display the number of tokens (the size of the vocabulary) in the dataset."

In [None]:
vectorizer = CountVectorizer(analyzer='word')
X = vectorizer.fit_transform(df['Text'])
y= df[['Sentiment','Emotion']]
print("The size of the vocabulary in this case is:", len(vectorizer.get_feature_names_out()))

#### 2.2

Split the dataset into 80% for training and 20% for testing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

#### 2.3

"Train and test the following classifiers, for both the emotion and the sentiment classification, using word frequency as features."

###### 2.3.1 Multinomial Naive Bayes Classifier

In [None]:
# Emotion
nb_emotion = MultinomialNB()
nb_emotion=nb_emotion.fit(X_train, y_train['Emotion'])
y_pred=nb_emotion.predict(X_test)

# Classification report for 2.4
description="Multinomial Naive Bayes classification of Emotion with default hyperparameters: alpha= 1.0, fit_prior= True,class_prior= None  \n"
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
fig, ax = plt.subplots(figsize=(10,10)) 
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")


In [None]:
# Sentiment 
nb_sentiment = MultinomialNB()
nb_sentiment = nb_sentiment.fit(X_train, y_train['Sentiment'])
y_pred=nb_sentiment.predict(X_test)

# Classification report for 2.4
description="Multinomial Naive Bayes classification of Sentiment with default hyperparameters: alpha= 1.0, fit_prior= True,class_prior= None  \n"
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png") 
builder.insert_image("report.png")
builder.write("\n")

###### 2.3.1 Decision Tree

In [None]:
# Emotion
dt_emotion = DecisionTreeClassifier()
dt_emotion = dt_emotion.fit(X_train, y_train['Emotion'])
y_pred=dt_emotion.predict(X_test)

# Classification report for 2.4
description="Decision Tree classification of Emotion with default hyperparameters: criterion(quality of split)= 'gini'(Gini impurity), splitter= 'best' (choose the best split), max_depth=None, min_sample_leaf=1...  \n"
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")

In [None]:
#get depth to take into account later
print("depth of tree", dt_emotion.tree_.max_depth)

In [None]:
# Sentiment
dt_sentiment = DecisionTreeClassifier()
dt_sentiment = dt_sentiment.fit(X_train, y_train['Sentiment'])
y_pred=dt_sentiment.predict(X_test)
#takes a long time to run

# Classification report for 2.4
description="Decision Tree classification of Sentiment with default hyperparameters: criterion(quality of split)= 'gini'(Gini impurity), splitter= 'best' (choose the best split), max_depth=None, min_sample_leaf=1...  \n"
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Sentiment'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")

In [None]:
#get depth to take into account later
print("depth of tree", dt_sentiment.tree_.max_depth)

###### 2.3.1 Multi-Layered Perceptron

In [None]:
# Emotion
mlp_emotion = MLPClassifier()
mlp_emotion= mlp_emotion.fit(X_train, y_train['Emotion'])
y_pred=mlp_emotion.predict(X_test)

# Classification report for 2.4
description="Multi-Layered perceptron classificator for Emotion with default hyperparameters: one hidden layer with 100 neurons, activation= 'relu',solver='adam', alpha(regularization)=0.0001... \n"
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")

In [None]:
# Sentiment
mlp_sentiment = MLPClassifier()
mlp_sentiment= mlp_sentiment.fit(X_train, y_train['Sentiment'])
y_pred=mlp_sentiment.predict(X_test)

# Classification report for 2.4
description="Multi-Layered perceptron classificator for Sentiment with default hyperparameters: one hidden layer with 100 neurons, activation= 'relu',solver='adam', alpha(regularization)=0.0001... \n"
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Sentiment'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")


###### 2.3.4 **Better** Multinomial Naive Bayes Classifier

In [None]:
# Emotion
nb_emotion = MultinomialNB()


param_grid = {
    "alpha": [0.25,0.5,1.2, 0, 2]
}

grid_search = GridSearchCV(MultinomialNB(), param_grid, cv=3, verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train['Emotion'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multinomial Naive Bayes for emotion classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")
    
    
    



In [None]:
# Sentiment
nb_emotion = MultinomialNB()


param_grid = {
    "alpha": [0.25,0.5,1.2, 0, 2]
}

grid_search = GridSearchCV(MultinomialNB(), param_grid, cv=3, verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train['Sentiment'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multinomial Naive Bayes for sentiment classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Sentiment'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")


###### 2.3.4 **Better** Decision Tree

In [None]:
# Emotion
param_grid = {
    "criterion":["gini","entropy"],
    "max_depth":[700, 1000],
    "min_samples_split":[0.1,0.50,0.70]
}

grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train, y_train['Emotion'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Desicion Tree for emotion classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")


In [None]:
# Sentiment
param_grid = {
    "criterion":["gini","entropy"],
    "max_depth":[700, 1000],
    "min_samples_split":[0.1,0.50,0.70]
}

grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train, y_train['Sentiment'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multinomial Naive Bayes for emotion classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Sentiment'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Sentiment'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")



###### 2.3.4 **Better** Multilayered Perceptron

In [None]:
# Emotion
param_grid = {
    "activation":['sigmoid', 'identity', 'tanh', 'relu'],
    "hidden_layer_sizes":[(1,100), (2,30)],
    "solver":['adam','sgd']
}

grid_search = GridSearchCV(MLPClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train, y_train['Emotion'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multilayered Perceptron for emotion classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")



In [None]:
# Sentiment
param_grid = {
    "activation":['sigmoid', 'identity', 'tanh', 'relu'],
    "hidden_layer_sizes":[(1,100), (2,30)],
    "solver":['adam','sgd']
}

grid_search = GridSearchCV(MLPClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train, y_train['Sentiment'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multilayered Perceptron for sentiment classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Sentiment'], y_pred, rownames=['True'], colnames=['Sentiment'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")



###### 2.4 Produce and save the following information in a file called performance

In [None]:
# save document
#doc.save("performance.docx")

###### 2.5  Use tf-idf instead of word frequencies and redo all substeps of 2.3 above

In [None]:
vectorizer = TfidfVectorizer()
X= vectorizer.fit_transform(df['Text'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
doc = aw.Document()
builder = aw.DocumentBuilder(doc)
builder.writeln("2.5 Repetition of steps of 2.3 with tf-idf")

###### 2.5.1 Multinomial Naive Bayes Classifier with tf-idf




In [None]:
# Emotion
nb_emotion = MultinomialNB()
nb_emotion=nb_emotion.fit(X_train, y_train['Emotion'])
y_pred=nb_emotion.predict(X_test)

# Classification report for 2.4
description="Multinomial Naive Bayes classification of Emotion with default hyperparameters: alpha= 1.0, fit_prior= True,class_prior= None  \n"
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
fig, ax = plt.subplots(figsize=(10,10)) 
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")


In [None]:
# Sentiment 
nb_sentiment = MultinomialNB()
nb_sentiment = nb_sentiment.fit(X_train, y_train['Sentiment'])
y_pred=nb_sentiment.predict(X_test)

# Classification report for 2.4
description="Multinomial Naive Bayes classification of Sentiment with default hyperparameters: alpha= 1.0, fit_prior= True,class_prior= None  \n"
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png") 
builder.insert_image("report.png")
builder.write("\n")

###### 2.5.2 Decision Tree with tf-idf

In [None]:
# Emotion
dt_emotion = DecisionTreeClassifier()
dt_emotion = dt_emotion.fit(X_train, y_train['Emotion'])
y_pred=dt_emotion.predict(X_test)

# Classification report for 2.4
description="Decision Tree classification of Emotion with default hyperparameters: criterion(quality of split)= 'gini'(Gini impurity), splitter= 'best' (choose the best split), max_depth=None, min_sample_leaf=1...  \n"
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")

In [None]:
#get depth to take into account later
print("depth of tree", dt_emotion.tree_.max_depth)

In [None]:
# Sentiment
dt_sentiment = DecisionTreeClassifier()
dt_sentiment = dt_sentiment.fit(X_train, y_train['Sentiment'])
y_pred=dt_sentiment.predict(X_test)
#takes a long time to run

# Classification report for 2.4
description="Decision Tree classification of Sentiment with default hyperparameters: criterion(quality of split)= 'gini'(Gini impurity), splitter= 'best' (choose the best split), max_depth=None, min_sample_leaf=1...  \n"
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Sentiment'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")

In [None]:
#get depth to take into account later
print("depth of tree", dt_sentiment.tree_.max_depth)

###### 2.5.3 Multi-Layered Perceptron with tf-idf

In [None]:
# Emotion
mlp_emotion = MLPClassifier()
mlp_emotion= mlp_emotion.fit(X_train, y_train['Emotion'])
y_pred=mlp_emotion.predict(X_test)

# Classification report for 2.4
description="Multi-Layered perceptron classificator for Emotion with default hyperparameters: one hidden layer with 100 neurons, activation= 'relu',solver='adam', alpha(regularization)=0.0001... \n"
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")

In [None]:
# Sentiment
mlp_sentiment = MLPClassifier()
mlp_sentiment= mlp_sentiment.fit(X_train, y_train['Sentiment'])
y_pred=mlp_sentiment.predict(X_test)

# Classification report for 2.4
description="Multi-Layered perceptron classificator for Sentiment with default hyperparameters: one hidden layer with 100 neurons, activation= 'relu',solver='adam', alpha(regularization)=0.0001... \n"
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Sentiment'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")


###### 2.5.4 **Better** Multinomial Naive Bayes Classifier with tf-idf

In [None]:
# Emotion
nb_emotion = MultinomialNB()


param_grid = {
    "alpha": [0.25,0.5,1.2, 0, 2]
}

grid_search = GridSearchCV(MultinomialNB(), param_grid, cv=3, verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train['Emotion'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multinomial Naive Bayes for emotion classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")
    
    
    



In [None]:
# Sentiment
nb_emotion = MultinomialNB()


param_grid = {
    "alpha": [0.25,0.5,1.2, 0, 2]
}

grid_search = GridSearchCV(MultinomialNB(), param_grid, cv=3, verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train['Sentiment'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multinomial Naive Bayes for sentiment classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Sentiment'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")


###### 2.5.5 **Better** Decision Tree with tf-idf

In [None]:
# Emotion
param_grid = {
    "criterion":["gini","entropy"],
    "max_depth":[700, 1000],
    "min_samples_split":[0.1,0.50,0.70]
}

grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train, y_train['Emotion'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Desicion Tree for emotion classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")


In [None]:
# Sentiment
param_grid = {
    "criterion":["gini","entropy"],
    "max_depth":[700, 1000],
    "min_samples_split":[0.1,0.50,0.70]
}

grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train, y_train['Sentiment'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multinomial Naive Bayes for emotion classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Sentiment'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Sentiment'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")



###### 2.5.6 **Better** Multilayered Perceptron with tf-idf

In [None]:
# Emotion
param_grid = {
    "activation":['sigmoid', 'identity', 'tanh', 'relu'],
    "hidden_layer_sizes":[(1,100), (2,30)],
    "solver":['adam','sgd']
}

grid_search = GridSearchCV(MLPClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train, y_train['Emotion'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multilayered Perceptron for emotion classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")



In [None]:
# Sentiment
param_grid = {
    "activation":['sigmoid', 'identity', 'tanh', 'relu'],
    "hidden_layer_sizes":[(1,100), (2,30)],
    "solver":['adam','sgd']
}

grid_search = GridSearchCV(MLPClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train, y_train['Sentiment'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test) 
   
# Classification report for 2.4
description="Better Multilayered Perceptron for sentiment classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Sentiment'], y_pred, rownames=['True'], colnames=['Sentiment'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")



In [None]:
# save document
doc.save("performance_tf-idf.docx")

## 3.Embeding as features

#### 3.1 Load pretrained model

In [None]:
#importing the model
model = KeyedVectors.load_word2vec_format('drive/MyDrive/Colab Notebooks/Artifical Intelligence/GoogleNews-vectors-negative300.bin', binary=True)

#### 3.2 Tokenize the post

In [None]:
nltk.download('punkt')
X_train, X_test, y_train, y_test = train_test_split(df['Text'], df[['Sentiment','Emotion']], test_size=0.20)
X_train_tokenized=X_train.apply(word_tokenize)
X_test_tokenized=X_test.apply(word_tokenize)


In [None]:
tokens=set()
X_train_tokenized.apply(lambda x: tokens.update(x))
print('The total number of unique tokens is:', len(tokens))

#### 3.3 Compute the embedding of the post

In [None]:
def embed(df):
    embeddings=[]
    not_found=0
    for post in df:
        current=np.zeros(300)
        num_words=0
        for word in post :
            if word in model:
                num_words+=1
                current+=model[word]
        if num_words!=0:
            current=current/num_words
        else:
            not_found+=1
        embeddings.append(current)
    return embeddings,not_found

In [None]:
X_train_embeded, rate1= embed(X_train_tokenized)
X_test_embeded, rate2= embed(X_test_tokenized)


#### 3.4 Overall hit rates

In [None]:
print("The percentage of positevely getting an non null embedding in the train set is", (len(X_train_tokenized)-rate1)/len(X_train_tokenized))
print("The percentage of positevely getting an non null embedding in the test set is", (len(X_train_tokenized)-rate2)/len(X_train_tokenized))

#### 3.5 Base Multi-Layered perceptron

In [None]:
# Emotion
mlp_emotion = MLPClassifier()
mlp_emotion= mlp_emotion.fit(X_train_embeded, y_train['Emotion'])
y_pred=mlp_emotion.predict(X_test_embeded)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)

# Classification report 
description="Multi-Layered perceptron classificator using the GoogleNews-vectors-negative300 pretrained model for Emotion classification with default hyperparameters: one hidden layer with 100 neurons, activation= 'relu',solver='adam', alpha(regularization)=0.0001... \n"
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")

In [None]:
# Sentiment
mlp_emotion = MLPClassifier()
mlp_emotion= mlp_emotion.fit(X_train_embeded, y_train['Sentiment'])
y_pred=mlp_emotion.predict(X_test_embeded)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)

# Classification report 
description="Multi-Layered perceptron classificator using the GoogleNews-vectors-negative300 pretrained model for Sentiment classfication with default hyperparameters: one hidden layer with 100 neurons, activation= 'relu',solver='adam', alpha(regularization)=0.0001... \n"
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Sentiment'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Sentiment'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")

#### 3.5 Better Multi-Layered perceptron

In [None]:
# Emotion
param_grid = {
    "activation":['sigmoid', 'identity', 'tanh', 'relu'],
    "hidden_layer_sizes":[(1,100), (2,30)],
    "solver":['adam','sgd']
}

grid_search = GridSearchCV(MLPClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train_embeded, y_train['Emotion'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test_embeded) 
   
# Classification report for 2.4
description="Better Multilayered Perceptron using the GoogleNews-vectors-negative300 pretrained model for emotion classification with hyperparameters tunned by grid search:"+grid.best_params_
builder.write(description)
clf_report=classification_report(y_test['Emotion'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))    
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")



In [None]:
# Emotion
param_grid = {
    "activation":['sigmoid', 'identity', 'tanh', 'relu'],
    "hidden_layer_sizes":[(1,100), (2,30)],
    "solver":['adam','sgd']
}

grid_search = GridSearchCV(MLPClassifier(), param_grid, cv=3, verbose=2, n_jobs=-1)
print(grid_search.get_params().keys())
grid_search.fit(X_train_embeded, y_train['Sentiment'])

# best parameter after tuning 
print(grid_search.best_params_) 
y_pred = grid_search.predict(X_test_embeded) 
   
# Classification report for 2.4
description="Better Multilayered Perceptron using the GoogleNews-vectors-negative300 pretrained model for emotion classification with hyperparameters tunned by grid search:"+grid_search.best_params_
builder.write(description)
clf_report=classification_report(y_test['Sentiment'], y_pred,labels=np.unique(y_pred),output_dict=True)
smap=sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
fig = smap.get_figure()
fig.savefig("report.png")
builder.write("Classification Report: \n")
builder.insert_image("report.png")
builder.write("\n")
builder.write("Confusion Matrix \n")
cf_matrix=confusion_matrix(y_test['Emotion'], y_pred)
fig, ax = plt.subplots(figsize=(20,20))
cmap=sns.heatmap(cf_matrix, annot=True,fmt='g', ax=ax)
axis=pd.crosstab(y_test['Emotion'], y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
axis=list(axis.index)[:-1]
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.tick_params(axis='both', which='major', labelsize=8)
ax.xaxis.set_ticklabels(axis);
ax.yaxis.set_ticklabels(axis)
fig = cmap.get_figure()
fig.savefig("cmatrix.png")
builder.insert_image("cmatrix.png")



In [None]:
doc.save("performance.docx")