In [1]:
import pickle as pkl
import numpy as np
import pandas as pd

from zipfile import ZipFile
from sklearn.metrics import accuracy_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.backend import clear_session

from twitter_nlp_toolkit.tweet_sentiment_classifier import tweet_sentiment_classifier
from twitter_nlp_toolkit.file_fetcher import file_fetcher



In [2]:
clear_session()

In [3]:
chunk = 1 # Fraction of data to train on - you can reduce for debugging for speed
model_path = '.models'
redownload=True

Here we download pre-trained models and a validation dataset. The models have been trained on the Sentiment140 dataset, taken form here: https://www.kaggle.com/kazanova/sentiment140

The validation data is hand-labeled airline customer feedback taken from https://www.figure-eight.com/data-for-everyone/

In [4]:
if redownload: 
    
    # Validation data
    file_fetcher.download_file('https://www.dropbox.com/s/muqov03s9vtbe92/tweets_airline.zip?dl=1',"tweets_airline.zip")
    


tweets_airline.zip: 100%|#########################################################| 1.07M/1.07M [00:00<00:00, 4.40MB/s]


In [5]:
# Load the validation data

test_data = pd.read_csv('tweets_airline.zip', header=0, names=['Index', 'Sentiment', 'Sentiment_confidence',
                                                                'Negative_reason', 'Negative_reason_confidence',
                                                                'Airline', 'Airline_sentiment_gold', 'Handle',
                                                                'Negative_reason_gold', 'Retweet_count', 'Text',
                                                                'Tweet_coord', 'Time', 'Location', 'Timezone'])

In [6]:
# Remove the unlabeled test data

test_data['Labels'] = (test_data['Sentiment'] == 'positive') * 2
test_data['Labels'] = test_data['Labels'] + (test_data['Sentiment'] == 'neutral') * 1
test_data['Labels'] = test_data['Labels'] / 2

test_data.set_index('Labels')
test_data = test_data[test_data.Labels != 0.5]

In [7]:
# Download the small ensemble
# Executing this cell starts a 500MB download

Classifier = tweet_sentiment_classifier.SentimentAnalyzer()
Classifier.load_small_ensemble()

bow_1Mtweets
Default parameters file not found or not working...
Trying .models/small_ensemble/bow_1Mtweets
Loading BoW model bow_1Mtweets from legacy parameter file
BoW model bow_1Mtweets loaded successfully
glove_100D_10N_340000V_719685T_Twitter_0
Default parameters file not found or not working...
Trying .models/small_ensemble/glove_100D_10N_340000V_719685T_Twitter_0
Loading GloVE model glove_100D_10N_340000V_719685T_Twitter_0 from legacy parameter file
Pre-trained embedding model glove_100D_10N_340000V_719685T_Twitter_0 loaded successfully
glove_200D_10N_20000V_343332T_Twitter_7
Default parameters file not found or not working...
Trying .models/small_ensemble/glove_200D_10N_20000V_343332T_Twitter_7
Loading GloVE model glove_200D_10N_20000V_343332T_Twitter_7 from legacy parameter file
Pre-trained embedding model glove_200D_10N_20000V_343332T_Twitter_7 loaded successfully


We santiy check the models: 

In [8]:
Classifier.predict(['I am happy', 'I am sad', 'I am cheerful', 'I am mad'])


array([1., 0., 1., 0.])

We test the model on an airline customer feedback dataset.

In [9]:
# Executing this cell takes several minuites on a laptop

predictions = Classifier.predict(test_data['Text'])


In [10]:
print('Test Accuracy:  {:.3f}'.format(accuracy_score(test_data['Labels'], predictions)))
print('Test MCC:  {:.3f}'.format(matthews_corrcoef(test_data['Labels'], predictions)))
confusion_matrix(test_data['Labels'], predictions)

Test Accuracy:  0.812
Test MCC:  0.570


array([[7305, 1873],
       [ 297, 2066]], dtype=int64)

We have accuracy of just over 80%.

We split our evaluation dataset into validation and testing and check for poor-performing models:

In [11]:
valX, testX, valY, testY = train_test_split(test_data['Text'], test_data['Labels'], test_size=0.5, stratify=test_data['Labels'])

In [12]:
# Executing this cell takes several minuites on a laptop

Classifier.trim_models(valX, valY, threshold=0.7)

Model bow_1Mtweets score: 0.808
Model glove_100D_10N_340000V_719685T_Twitter_0 score: 0.796
Model glove_200D_10N_20000V_343332T_Twitter_7 score: 0.803


All three models perform well, so none have been pruned

In [13]:
predictions = Classifier.predict(testX)

print('Test Accuracy:  {:.3f}'.format(accuracy_score(testY, predictions)))
print('Test MCC:  {:.3f}'.format(matthews_corrcoef(testY, predictions)))
confusion_matrix(testY, predictions)

Test Accuracy:  0.805
Test MCC:  0.556


array([[3623,  966],
       [ 160, 1022]], dtype=int64)

To improve our accuracy, we can refine the models on our airline data. The early stopping procedure (enabled by default to use 20% of the training data for validation) should minimize overfitting.

In [14]:
Classifier.refine(valX, valY)

 Preprocessed 5770 tweets 

 Filtered data
 Preprocessed 5770 tweets 
Filtered data
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 4616 samples, validate on 1154 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500


Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500


Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 00167: early stopping
 Preprocessed 5770 tweets 
Filtered data
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 4616 samples, validate on 1154 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500


Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500


Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500


Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500


Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 00218: early stopping


In [15]:
test_predictions = Classifier.predict(testX)

print('Test Accuracy:  {:.3f}'.format(accuracy_score(testY, test_predictions)))
print('Test MCC:  {:.3f}'.format(matthews_corrcoef(testY, test_predictions)))
confusion_matrix(testY, predictions)

Test Accuracy:  0.914
Test MCC:  0.721


array([[3623,  966],
       [ 160, 1022]], dtype=int64)

Now we have accuracies of over 90%! 

In [16]:
Classifier.evaluate(testX, testY)

Model bow_1Mtweets accuracy_score: 0.891
Model glove_100D_10N_340000V_719685T_Twitter_0 accuracy_score: 0.907
Model glove_200D_10N_20000V_343332T_Twitter_7 accuracy_score: 0.915


{'bow_1Mtweets': 0.8906601975394213,
 'glove_100D_10N_340000V_719685T_Twitter_0': 0.9071218159764339,
 'glove_200D_10N_20000V_343332T_Twitter_7': 0.9147461445156818,
 'ensembled': 0.9143995841275342}