In [1]:
import spacy
import pandas as pd
import csv
import numpy as np


In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
dataset = pd.read_csv("SemEval2018-Task3/datasets/train/SemEval2018-T3-train-taskB.txt", delimiter='\t', quoting=csv.QUOTE_NONE, error_bad_lines=False)

# B) Classification Task Analysis 

## Class Distributions

In [4]:
print('Instances:\n',dataset['Label'].value_counts())
print('Relative label frequency:\n',dataset['Label'].value_counts(normalize=True))
for label in dataset['Label'].unique():
    df_label = dataset.loc[dataset['Label']==label]
    print('Label: ', label, 'example sentence: ', df_label['Tweet text'].iloc[0])
    print('\n')

Instances:
 0    1923
1    1390
2     316
3     205
Name: Label, dtype: int64
Relative label frequency:
 0    0.501565
1    0.362546
2    0.082420
3    0.053469
Name: Label, dtype: float64
Label:  1 example sentence:  Sweet United Nations video. Just in time for Christmas. #imagine #NoReligion  http://t.co/fej2v3OUBR


Label:  0 example sentence:  3 episodes left I'm dying over here


Label:  2 example sentence:  "I can't breathe!" was chosen as the most notable quote of the year in an annual list released by a Yale University librarian 


Label:  3 example sentence:  @yWTorres9 time to hit the books then 




## Baselines

In [5]:
import random
import sklearn.metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
random.seed(10)

In [6]:
test_set = pd.read_csv("SemEval2018-Task3/datasets/goldtest_TaskB/SemEval2018-T3_gold_test_taskB_emoji.txt", delimiter='\t', quoting=csv.QUOTE_NONE, error_bad_lines=False)
y_true = test_set['Label']

In [7]:
print(test_set.Label.unique())

[0 2 1 3]


### Random baseline

In [95]:
acc = np.zeros(4)
prec = np.zeros(4)
rec = np.zeros(4)
f1_score = np.zeros(4)
macro_avg = np.zeros(4)
weighted_avg = np.zeros(4)

In [96]:
for i in range(100):
    y_pred = random.choices(test_set['Label'].unique(), k=len(test_set))
    acc_category = np.diagonal(confusion_matrix(y_true, y_pred, normalize='true')) # accuracy per class
    acc += acc_category
    acc_pooled = accuracy_score(y_true, y_pred) # overall accuracy


    class_report = classification_report(y_true, y_pred, output_dict=True)

    # update arrays by indexing over classification report dict
    for i in range(4):
        prec[i] += class_report[str(i)]['precision']
        rec[i] += class_report[str(i)]['recall']
        f1_score[i] += class_report[str(i)]['f1-score']


    macro_avg_values =  list(class_report['macro avg'].values())[:3] # retrieve macro avg values for precision, recall, f-1
    macro_avg_values.insert(0, np.mean(acc_category)) # add macro avg accuracy
    macro_avg += np.array(macro_avg_values)

    weighted_avg_values = list(class_report['weighted avg'].values())[:3] # retrieve weighted avg values for precision, recall, f-1
    weighted_avg_values.insert(0, acc_pooled) # add weighted avg accuracy
    weighted_avg += np.array(weighted_avg_values)



In [97]:
print('Accuracy for all classes: ', acc/100)
print('Precision for all classes: ', prec/100)
print('Recall for all classes: ', rec/100)
print('F1-score for all classes: ', f1_score/100)
print('Macro average for accuracy, precision, recall, F1-score: ', macro_avg/100)
print('Weighted average for accuracy, precision, recall, F1-score: ', weighted_avg/100)

Accuracy for all classes:  [0.25114165 0.24908537 0.25047059 0.23693548]
Precision for all classes:  [0.60453637 0.20964374 0.10720716 0.07587426]
Recall for all classes:  [0.25114165 0.24908537 0.25047059 0.23693548]
F1-score for all classes:  [0.3545478  0.22740452 0.14999612 0.11486054]
Macro average for accuracy, precision, recall, F1-score:  [0.24690827 0.24931538 0.24690827 0.21170224]
Weighted average for accuracy, precision, recall, F1-score:  [0.24951531 0.42620419 0.24951531 0.28681948]


### Majority baseline 

In [105]:
majority_class = test_set['Label'].value_counts().argmax()
y_pred = [majority_class]*len(test_set)


In [106]:
acc = np.zeros(4)
prec = np.zeros(4)
rec = np.zeros(4)
f1_score = np.zeros(4)
macro_avg = np.zeros(4)
weighted_avg = np.zeros(4)

In [107]:
acc_category = np.diagonal(confusion_matrix(y_true, y_pred, normalize='true')) # accuracy per class
acc += acc_category
acc_pooled = accuracy_score(y_true, y_pred) # overall accuracy


class_report = classification_report(y_true, y_pred, output_dict=True)

# update arrays by indexing over classification report dict
for i in range(4):
    prec[i] += class_report[str(i)]['precision']
    rec[i] += class_report[str(i)]['recall']
    f1_score[i] += class_report[str(i)]['f1-score']


macro_avg_values =  list(class_report['macro avg'].values())[:3] # retrieve macro avg values for precision, recall, f-1
macro_avg_values.insert(0, np.mean(acc_category)) # add macro avg accuracy
macro_avg += np.array(macro_avg_values)

weighted_avg_values = list(class_report['weighted avg'].values())[:3] # retrieve weighted avg values for precision, recall, f-1
weighted_avg_values.insert(0, acc_pooled) # add weighted avg accuracy
weighted_avg += np.array(weighted_avg_values)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [108]:
print('Accuracy for all classes: ', acc)
print('Precision for all classes: ', prec)
print('Recall for all classes: ', rec)
print('F1-score for all classes: ', f1_score)
print('Macro average for accuracy, precision, recall, F1-score: ', macro_avg)
print('Weighted average for accuracy, precision, recall, F1-score: ', weighted_avg)

Accuracy for all classes:  [1. 0. 0. 0.]
Precision for all classes:  [0.60331633 0.         0.         0.        ]
Recall for all classes:  [1. 0. 0. 0.]
F1-score for all classes:  [0.75258552 0.         0.         0.        ]
Macro average for accuracy, precision, recall, F1-score:  [0.25       0.15082908 0.25       0.18814638]
Weighted average for accuracy, precision, recall, F1-score:  [0.60331633 0.36399059 0.60331633 0.45404713]
