# Import data and Library

In [0]:
import pandas as pd
import numpy as np
import zipfile
from google.colab import files
from wordcloud import WordCloud

In [0]:
uploaded = files.upload()

Saving data_tweet.zip to data_tweet.zip


In [0]:
!unzip 'data_tweet.zip'

Archive:  data_tweet.zip
   creating: data_tweet/
  inflating: data_tweet/2018-E-c-En-test.txt  
  inflating: data_tweet/2018-E-c-En-train.txt  
  inflating: data_tweet/key_norm.csv  


In [0]:
df = pd.read_csv('data_tweet/2018-E-c-En-train.txt', sep='\t')
df.head()

Unnamed: 0,ID,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
0,2017-En-21441,“Worry is a down payment on a problem you may ...,0,1,0,0,0,0,1,0,0,0,1
1,2017-En-31535,Whatever you decide to do make sure it makes y...,0,0,0,0,1,1,1,0,0,0,0
2,2017-En-21068,@Max_Kellerman it also helps that the majorit...,1,0,1,0,1,0,1,0,0,0,0
3,2017-En-31436,Accept the challenges so that you can literall...,0,0,0,0,1,0,1,0,0,0,0
4,2017-En-22195,My roommate: it's okay that we can't spell bec...,1,0,1,0,0,0,0,0,0,0,0


# Preprocessing

In [0]:
df_tweet = df_train['Tweet']

In [0]:
from nltk.tokenize import WordPunctTokenizer
from bs4 import BeautifulSoup
import re
tok = WordPunctTokenizer()
pat1 = r'@[A-Za-z0-9]+'
pat2 = r'https?://[A-Za-z0-9./]+'
combined_pat = r'|'.join((pat1, pat2))

def tweet_cleaner(text):
    soup = BeautifulSoup(text, 'lxml')
    souped = soup.get_text()
    stripped = re.sub(combined_pat, '', souped)
    try:
        clean = stripped.decode("utf-8-sig").replace(u"\ufffd", "?")
    except:
        clean = stripped
    letters_only = re.sub("[^a-zA-Z]", " ", clean)
    lower_case = letters_only.lower()
    words = tok.tokenize(lower_case)
    return (" ".join(words)).strip()

In [0]:
df_tweet = df_tweet.apply(lambda text: tweet_cleaner(text))
df_tweet.head()

0    worry is a down payment on a problem you may n...
1    whatever you decide to do make sure it makes y...
2    kellerman it also helps that the majority of n...
3    accept the challenges so that you can literall...
4    my roommate it s okay that we can t spell beca...
Name: Tweet, dtype: object

# Feature Extraction

In [0]:
from sklearn.feature_extraction.text import CountVectorizer

tweet_train = CountVectorizer().fit_transform(df_tweet)

In [0]:
from sklearn.feature_extraction.text import TfidfTransformer

tweet_tfidf = TfidfTransformer().fit_transform(tweet_train)

In [0]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [0]:
from sklearn.model_selection import train_test_split

In [0]:
X = tweet_tfidf

y_anger = df['anger']
y_anticipation = df['anticipation']
y_disgust = df['disgust']
y_fear = df['fear']
y_joy = df['joy']
y_love = df['love']
y_optimism = df['optimism']
y_pessimism = df['pessimism']
y_sadness = df['sadness']
y_surprise = df['surprise']
y_trust = df['trust']

In [0]:
X_train, X_test = train_test_split(X, test_size=0.25)

y_train_anger, y_test_anger = train_test_split(y_anger, test_size=0.25)
y_train_anticipation, y_test_anticipation = train_test_split(y_anticipation, test_size=0.25)
y_train_disgust, y_test_disgust = train_test_split(y_disgust, test_size=0.25)
y_train_fear, y_test_fear = train_test_split(y_fear, test_size=0.25)
y_train_joy, y_test_joy = train_test_split(y_joy, test_size=0.25)
y_train_love, y_test_love = train_test_split(y_love, test_size=0.25)
y_train_optimism, y_test_optimism = train_test_split(y_optimism, test_size=0.25)
y_train_pessimism, y_test_pessimism = train_test_split(y_pessimism, test_size=0.25)
y_train_sadness, y_test_sadness = train_test_split(y_sadness, test_size=0.25)
y_train_surprise, y_test_surprise = train_test_split(y_surprise, test_size=0.25)
y_train_trust, y_test_trust = train_test_split(y_trust, test_size=0.25)

# Classification With Naive Bayes

### Anger Emotion Clasification

In [0]:
clf_nb_anger = MultinomialNB()
clf_nb_anger.fit(X_train, y_train_anger)

pred_nb_anger = clf_nb_anger.predict(X_test)
print("Accuracy Anger Emotion\t: ",accuracy_score(y_test_anger, pred_nb_anger))
print("Precision Anger Emotion\t: ",precision_score(y_test_anger, pred_nb_anger))
print("Recall Anger Emotion\t: ",recall_score(y_test_anger, pred_nb_anger))
print("F1-score Anger Emotion\t: ",f1_score(y_test_anger, pred_nb_anger))

Accuracy Anger Emotion	:  0.6292397660818714
Precision Anger Emotion	:  0.41509433962264153
Recall Anger Emotion	:  0.0352
F1-score Anger Emotion	:  0.06489675516224189


### Anticipation Emotion Clasification

In [0]:
clf_nb_anticipation = MultinomialNB()
clf_nb_anticipation.fit(X_train, y_train_anticipation)

pred_nb_anticipation = clf_nb_anticipation.predict(X_test)
print("Accuracy anticipation Emotion\t: ",accuracy_score(y_test_anticipation, pred_nb_anticipation))
print("Precision anticipation Emotion\t: ",precision_score(y_test_anticipation, pred_nb_anticipation))
print("Recall anticipation Emotion\t: ",recall_score(y_test_anticipation, pred_nb_anticipation))
print("F1-score anticipation Emotion\t: ",f1_score(y_test_anticipation, pred_nb_anticipation))

Accuracy anticipation Emotion	:  0.8584795321637427
Precision anticipation Emotion	:  0.0
Recall anticipation Emotion	:  0.0
F1-score anticipation Emotion	:  0.0


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


### Disgust Emotion Clasification

In [0]:
clf_nb_disgust = MultinomialNB()
clf_nb_disgust.fit(X_train, y_train_disgust)

pred_nb_disgust = clf_nb_disgust.predict(X_test)
print("Accuracy disgust Emotion\t: ",accuracy_score(y_test_disgust, pred_nb_disgust))
print("Precision disgust Emotion\t: ",precision_score(y_test_disgust, pred_nb_disgust))
print("Recall disgust Emotion\t\t: ",recall_score(y_test_disgust, pred_nb_disgust))
print("F1-score disgust Emotion\t: ",f1_score(y_test_disgust, pred_nb_disgust))

Accuracy disgust Emotion	:  0.5982456140350877
Precision disgust Emotion	:  0.2711864406779661
Recall disgust Emotion		:  0.024242424242424242
F1-score disgust Emotion	:  0.044506258692628656


### Fear Emotion Clasification

In [0]:
clf_nb_fear = MultinomialNB()
clf_nb_fear.fit(X_train, y_train_fear)

pred_nb_fear = clf_nb_fear.predict(X_test)
print("Accuracy fear Emotion\t: ",accuracy_score(y_test_fear, pred_nb_fear))
print("Precision fear Emotion\t: ",precision_score(y_test_fear, pred_nb_fear))
print("Recall fear Emotion\t: ",recall_score(y_test_fear, pred_nb_fear))
print("F1-score fear Emotion\t: ",f1_score(y_test_fear, pred_nb_fear))

Accuracy fear Emotion	:  0.808187134502924
Precision fear Emotion	:  0.0
Recall fear Emotion	:  0.0
F1-score fear Emotion	:  0.0


### Joy Emotion Clasification

In [0]:
clf_nb_joy = MultinomialNB()
clf_nb_joy.fit(X_train, y_train_joy)

pred_nb_joy = clf_nb_joy.predict(X_test)
print("Accuracy joy Emotion\t: ",accuracy_score(y_test_joy, pred_nb_joy))
print("Precision joy Emotion\t: ",precision_score(y_test_joy, pred_nb_joy))
print("Recall joy Emotion\t: ",recall_score(y_test_joy, pred_nb_joy))
print("F1-score joy Emotion\t: ",f1_score(y_test_joy, pred_nb_joy))

Accuracy joy Emotion	:  0.6257309941520468
Precision joy Emotion	:  0.3333333333333333
Recall joy Emotion	:  0.02073365231259968
F1-score joy Emotion	:  0.03903903903903904


### Love Emotion Clasification

In [0]:
clf_nb_love= MultinomialNB()
clf_nb_love.fit(X_train, y_train_love)

pred_nb_love = clf_nb_love.predict(X_test)
print("Accuracy Anger Emotion\t: ",accuracy_score(y_test_love, pred_nb_love))
print("Precision Anger Emotion\t: ",precision_score(y_test_love, pred_nb_love))
print("Recall Anger Emotion\t: ",recall_score(y_test_love, pred_nb_love))
print("F1-score Anger Emotion\t: ",f1_score(y_test_love, pred_nb_love))

Accuracy Anger Emotion	:  0.8970760233918129
Precision Anger Emotion	:  0.0
Recall Anger Emotion	:  0.0
F1-score Anger Emotion	:  0.0


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


### Optimism Emotion Clasification

In [0]:
clf_nb_optimism = MultinomialNB()
clf_nb_optimism.fit(X_train, y_train_optimism)

pred_nb_optimism = clf_nb_optimism.predict(X_test)
print("Accuracy optimism Emotion\t: ",accuracy_score(y_test_optimism, pred_nb_optimism))
print("Precision optimism Emotion\t: ",precision_score(y_test_optimism, pred_nb_optimism))
print("Recall optimism Emotion\t\t: ",recall_score(y_test_optimism, pred_nb_optimism))
print("F1-score optimism Emotion\t: ",f1_score(y_test_optimism, pred_nb_optimism))

Accuracy optimism Emotion	:  0.7
Precision optimism Emotion	:  0.0
Recall optimism Emotion		:  0.0
F1-score optimism Emotion	:  0.0


### Pessimism Emotion Clasification

In [0]:
clf_nb_pessimism = MultinomialNB()
clf_nb_pessimism.fit(X_train, y_train_pessimism)

pred_nb_pessimism = clf_nb_pessimism.predict(X_test)
print("Accuracy pessimism Emotion\t: ",accuracy_score(y_test_pessimism, pred_nb_pessimism))
print("Precision pessimism Emotion\t: ",precision_score(y_test_pessimism, pred_nb_pessimism))
print("Recall pessimism Emotion\t: ",recall_score(y_test_pessimism, pred_nb_pessimism))
print("F1-score pessimism Emotion\t: ",f1_score(y_test_pessimism, pred_nb_pessimism))

Accuracy pessimism Emotion	:  0.8842105263157894
Precision pessimism Emotion	:  0.0
Recall pessimism Emotion	:  0.0
F1-score pessimism Emotion	:  0.0


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


### Sadness Emotion Clasification

In [0]:
clf_nb_sadness = MultinomialNB()
clf_nb_sadness.fit(X_train, y_train_sadness)

pred_nb_sadness = clf_nb_sadness.predict(X_test)
print("Accuracy Anger Emotion\t: ",accuracy_score(y_test_sadness, pred_nb_sadness))
print("Precision Anger Emotion\t: ",precision_score(y_test_sadness, pred_nb_sadness))
print("Recall Anger Emotion\t: ",recall_score(y_test_sadness, pred_nb_sadness))
print("F1-score Anger Emotion\t: ",f1_score(y_test_sadness, pred_nb_sadness))

Accuracy Anger Emotion	:  0.7134502923976608
Precision Anger Emotion	:  0.25
Recall Anger Emotion	:  0.00411522633744856
F1-score Anger Emotion	:  0.008097165991902836


### Surprise Emotion Clasification

In [0]:
clf_nb_surprise = MultinomialNB()
clf_nb_surprise.fit(X_train, y_train_surprise)

pred_nb_surprise = clf_nb_surprise.predict(X_test)
print("Accuracy Anger Emotion\t: ",accuracy_score(y_test_surprise, pred_nb_surprise))
print("Precision Anger Emotion\t: ",precision_score(y_test_surprise, pred_nb_surprise))
print("Recall Anger Emotion\t: ",recall_score(y_test_surprise, pred_nb_surprise))
print("F1-score Anger Emotion\t: ",f1_score(y_test_surprise, pred_nb_surprise))

Accuracy Anger Emotion	:  0.9450292397660819
Precision Anger Emotion	:  0.0
Recall Anger Emotion	:  0.0
F1-score Anger Emotion	:  0.0


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


### Trust Emotion Clasification

In [0]:
clf_nb_trust = MultinomialNB()
clf_nb_trust.fit(X_train, y_train_trust)

pred_nb_trust = clf_nb_trust.predict(X_test)
print("Accuracy Anger Emotion\t: ",accuracy_score(y_test_trust, pred_nb_trust))
print("Precision Anger Emotion\t: ",precision_score(y_test_trust, pred_nb_trust))
print("Recall Anger Emotion\t: ",recall_score(y_test_trust, pred_nb_trust))
print("F1-score Anger Emotion\t: ",f1_score(y_test_trust, pred_nb_trust))

Accuracy Anger Emotion	:  0.9543859649122807
Precision Anger Emotion	:  0.0
Recall Anger Emotion	:  0.0
F1-score Anger Emotion	:  0.0


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
