In [3]:
%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append('../')

from typing import List,Dict
from elmo_on_md.data_loaders.sentiment_loader import SentimentLoader
from elmo_on_md.evaluation.sentiment_analysis import SentimentAnalysis
from elmo_on_md.evaluation.model_loader import load_model

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import matplotlib.pyplot as plt
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
loader = SentimentLoader()
sentiment_data = loader.load_data()

sentences = sentiment_data['train']['sentences']
labels = sentiment_data['train']['labels']

tokens_train,tokens_test, labels_train,labels_test = train_test_split(sentences, labels, test_size=0.2, random_state=1)
train_set = {'sentences':tokens_train,'labels':labels_train}
validate_set = {'sentences':tokens_test,'labels':labels_test}

### Original ELMo

In [3]:
elmo = load_model('original', batch_size = 32)
sentiment = SentimentAnalysis([elmo],lr=1e-4)

sentiment.train(train_set,validate_set,n_epochs=50, batch_size = 64)

2019-09-01 17:43:32,575 INFO: char embedding size: 2289
2019-09-01 17:43:33,272 INFO: word embedding size: 189561
2019-09-01 17:43:38,018 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

Epoch: 0	 Train Loss: 1.1235072612762451	
Epoch: 1	 Train Loss: 1.222253441810608	
Epoch: 2	 Train Loss: 1.0119001865386963	
Epoch: 3	 Train Loss: 0.759834885597229	
Epoch: 4	 Train Loss: 0.5796594023704529	
Epoch: 5	 Train Loss: 0.6713117361068726	
Epoch: 6	 Train Loss: 0.5812109708786011	
Epoch: 7	 Train Loss: 0.5967259407043457	
Epoch: 8	 Train Loss: 0.5742737650871277	
Epoch: 9	 Train Loss: 0.5597250461578369	
Epoch: 10	 Train Loss: 0.5662811398506165	
Epoch: 11	 Train Loss: 0.5628848671913147	
Epoch: 12	 Train Loss: 0.5587441921234131	
Epoch: 13	 Train Loss: 0.5574495792388916	
Epoch: 14	 Train Loss: 0.5545092821121216	
Epoch: 15	 Train Loss: 0.5527603030204773	
Epoch: 16	 Train Loss: 0.5539493560791016	
Epoch: 17	 Train Loss: 0.5614719390869141	
Epoch: 18	 Train Loss: 0.5533530116081238	
Epoch: 19	 Train Loss: 0.5543757081031799	
Epoch: 20	 Train Loss: 0.5602818131446838	
Epoch: 21	 Train Loss: 0.5523852705955505	
Epoch: 22	 Train Loss: 0.5519161820411682	
Epoch: 23	 Train Loss: 

<elmo_on_md.evaluation.sentiment_analysis.SentimentAnalysis at 0x1a6556f0898>

In [4]:
# train_preds = sentiment.predict(sentiment_data['train'])
# print(confusion_matrix(sentiment_data['train']['labels'],train_preds))
y_pred = sentiment.predict(sentiment_data['test'])
y_true = sentiment_data['test']['labels']

print(confusion_matrix(y_true,y_pred))
print(classification_report(y_true, y_pred, target_names=['positive','negative','neutral']))

2019-09-01 17:50:17,957 INFO: 80 batches, avg len: 22.7
2019-09-01 17:50:23,124 INFO: Finished 1000 sentences.
2019-09-01 17:50:28,576 INFO: Finished 2000 sentences.


[[1409  261   28]
 [  81  692   17]
 [  11   30   31]]
              precision    recall  f1-score   support

    positive       0.94      0.83      0.88      1698
    negative       0.70      0.88      0.78       790
     neutral       0.41      0.43      0.42        72

    accuracy                           0.83      2560
   macro avg       0.68      0.71      0.69      2560
weighted avg       0.85      0.83      0.84      2560



### Dual model

In [3]:
elmo = load_model('original', batch_size = 32)
new_elmo = load_model('pos_weight8_lr-4_elmo_at_15_30_epochs')
new_elmo.batch_size = 32
sentiment = SentimentAnalysis([elmo, new_elmo],lr=1e-4)

sentiment.train(train_set,validate_set,n_epochs=50, batch_size = 64)

2019-09-01 17:29:44,441 INFO: char embedding size: 2289
2019-09-01 17:29:45,134 INFO: word embedding size: 189561
2019-09-01 17:29:50,514 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

Epoch: 0	 Train Loss: 1.1223949193954468	
Epoch: 1	 Train Loss: 1.4271427392959595	
Epoch: 2	 Train Loss: 1.1509649753570557	
Epoch: 3	 Train Loss: 0.7792381048202515	
Epoch: 4	 Train Loss: 0.6108531951904297	
Epoch: 5	 Train Loss: 0.5671049356460571	
Epoch: 6	 Train Loss: 0.593056857585907	
Epoch: 7	 Train Loss: 0.561571478843689	
Epoch: 8	 Train Loss: 0.5621851682662964	
Epoch: 9	 Train Loss: 0.55265212059021	
Epoch: 10	 Train Loss: 0.573776125907898	
Epoch: 11	 Train Loss: 0.5613865852355957	
Epoch: 12	 Train Loss: 0.5575268268585205	
Epoch: 13	 Train Loss: 0.554081380367279	
Epoch: 14	 Train Loss: 0.5538483262062073	
Epoch: 15	 Train Loss: 0.5527641177177429	
Epoch: 16	 Train Loss: 0.5520601272583008	
Epoch: 17	 Train Loss: 0.5523673295974731	
Epoch: 18	 Train Loss: 0.5518531799316406	
Epoch: 19	 Train Loss: 0.5524479746818542	
Epoch: 20	 Train Loss: 0.5516145825386047	
Epoch: 21	 Train Loss: 0.5518057942390442	
Epoch: 22	 Train Loss: 0.552434504032135	
Epoch: 23	 Train Loss: 0.551

<elmo_on_md.evaluation.sentiment_analysis.SentimentAnalysis at 0x2bd1bc47e10>

#### results

In [4]:
# train_preds = sentiment.predict(sentiment_data['train'])
# print(confusion_matrix(sentiment_data['train']['labels'],train_preds))
y_pred = sentiment.predict(sentiment_data['test'])
y_true = sentiment_data['test']['labels']

print(confusion_matrix(y_true,y_pred))
print(classification_report(y_true, y_pred, target_names=['positive','negative','neutral']))

2019-09-01 17:39:30,485 INFO: 80 batches, avg len: 22.7
2019-09-01 17:39:36,072 INFO: Finished 1000 sentences.
2019-09-01 17:39:39,836 INFO: Finished 2000 sentences.
2019-09-01 17:39:49,682 INFO: 80 batches, avg len: 22.7
2019-09-01 17:39:53,511 INFO: Finished 1000 sentences.
2019-09-01 17:39:59,464 INFO: Finished 2000 sentences.


[[1432  248   18]
 [  51  732    7]
 [  14   29   29]]
              precision    recall  f1-score   support

    positive       0.96      0.84      0.90      1698
    negative       0.73      0.93      0.81       790
     neutral       0.54      0.40      0.46        72

    accuracy                           0.86      2560
   macro avg       0.74      0.72      0.72      2560
weighted avg       0.87      0.86      0.86      2560



# Our Model

In [5]:
new_elmo = load_model('pos_weight8_lr-4_elmo_at_15_30_epochs')
new_elmo.batch_size = 32
sentiment = SentimentAnalysis([new_elmo],lr=1e-4)

sentiment.train(train_set,validate_set,n_epochs=50, batch_size = 64)

2019-09-01 17:54:29,412 INFO: 257 batches, avg len: 22.3
2019-09-01 17:54:34,424 INFO: Finished 1000 sentences.
2019-09-01 17:54:39,443 INFO: Finished 2000 sentences.
2019-09-01 17:54:45,001 INFO: Finished 3000 sentences.
2019-09-01 17:54:49,547 INFO: Finished 4000 sentences.
2019-09-01 17:54:53,882 INFO: Finished 5000 sentences.
2019-09-01 17:54:59,540 INFO: Finished 6000 sentences.
2019-09-01 17:55:03,787 INFO: Finished 7000 sentences.
2019-09-01 17:55:08,919 INFO: Finished 8000 sentences.
2019-09-01 17:55:15,440 INFO: 65 batches, avg len: 22.6
2019-09-01 17:55:21,484 INFO: Finished 1000 sentences.
2019-09-01 17:55:26,096 INFO: Finished 2000 sentences.
  output = self.softmax(output)


Epoch: 0	 Train Loss: 1.1858340501785278	
Epoch: 1	 Train Loss: 1.2816157341003418	
Epoch: 2	 Train Loss: 0.7523992657661438	
Epoch: 3	 Train Loss: 0.7508202791213989	
Epoch: 4	 Train Loss: 0.605176568031311	
Epoch: 5	 Train Loss: 0.5572603344917297	
Epoch: 6	 Train Loss: 0.5558092594146729	
Epoch: 7	 Train Loss: 0.560070812702179	
Epoch: 8	 Train Loss: 0.5581343173980713	
Epoch: 9	 Train Loss: 0.5531948208808899	
Epoch: 10	 Train Loss: 0.5604509711265564	
Epoch: 11	 Train Loss: 0.553894579410553	
Epoch: 12	 Train Loss: 0.5517213344573975	
Epoch: 13	 Train Loss: 0.5532650947570801	
Epoch: 14	 Train Loss: 0.5517301559448242	
Epoch: 15	 Train Loss: 0.552293598651886	
Epoch: 16	 Train Loss: 0.553195595741272	
Epoch: 17	 Train Loss: 0.5544922351837158	
Epoch: 18	 Train Loss: 0.5540428757667542	
Epoch: 19	 Train Loss: 0.5532054901123047	
Epoch: 20	 Train Loss: 0.5517415404319763	
Epoch: 21	 Train Loss: 0.5517334938049316	
Epoch: 22	 Train Loss: 0.5518262982368469	
Epoch: 23	 Train Loss: 0.5

<elmo_on_md.evaluation.sentiment_analysis.SentimentAnalysis at 0x1d723109da0>

In [6]:
y_pred = sentiment.predict(sentiment_data['test'])
y_true = sentiment_data['test']['labels']

print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred, target_names=['bad','neutral','good']))

2019-09-01 18:00:49,213 INFO: 80 batches, avg len: 22.7
2019-09-01 18:00:54,631 INFO: Finished 1000 sentences.
2019-09-01 18:00:59,536 INFO: Finished 2000 sentences.


[[1490  182   26]
 [ 114  652   24]
 [  14   22   36]]
              precision    recall  f1-score   support

         bad       0.92      0.88      0.90      1698
     neutral       0.76      0.83      0.79       790
        good       0.42      0.50      0.46        72

    accuracy                           0.85      2560
   macro avg       0.70      0.73      0.72      2560
weighted avg       0.86      0.85      0.85      2560

