In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append('../')

from typing import List,Dict
from elmo_on_md.data_loaders.sentiment_loader import SentimentLoader
from elmo_on_md.evaluation.sentiment_analysis import SentimentAnalysis
from elmo_on_md.evaluation.model_loader import load_model
from ELMoForManyLangs.elmoformanylangs import Embedder

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam
 
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
loader = SentimentLoader()
sentiment_data = loader.load_data()

In [5]:

elmo = load_model('original')
sentiment = SentimentAnalysis(elmo,lr=1e-4)
sentences = sentiment_data['train']['sentences']
labels = sentiment_data['train']['labels']

tokens_train,tokens_test, labels_train,labels_test = train_test_split(sentences, labels, test_size=0.2, random_state=1)

train_set = {'sentences':tokens_train,'labels':labels_train}
validate_set = {'sentences':tokens_test,'labels':labels_test}

sentiment.train(train_set,validate_set,n_epochs=40)



I0816 23:20:21.262648 16540 elmo.py:133] char embedding size: 2289
I0816 23:20:22.684382 16540 elmo.py:151] word embedding size: 189561
I0816 23:20:28.095322 16540 elmo.py:162] Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1)

0.0001


I0816 23:20:55.224915 16540 elmo.py:97] 129 batches, avg len: 18.6
I0816 23:22:51.265885 16540 elmo.py:211] Finished 1000 sentences.
I0816 23:23:54.091761 16540 elmo.py:211] Finished 2000 sentences.
I0816 23:25:16.172173 16540 elmo.py:211] Finished 3000 sentences.
I0816 23:26:56.127280 16540 elmo.py:211] Finished 4000 sentences.
I0816 23:28:20.354943 16540 elmo.py:211] Finished 5000 sentences.
I0816 23:29:35.205625 16540 elmo.py:211] Finished 6000 sentences.
I0816 23:30:30.889241 16540 elmo.py:211] Finished 7000 sentences.
I0816 23:31:31.910381 16540 elmo.py:211] Finished 8000 sentences.
I0816 23:32:07.477554 16540 elmo.py:97] 33 batches, avg len: 18.9
I0816 23:33:50.016207 16540 elmo.py:211] Finished 1000 sentences.
I0816 23:34:54.039341 16540 elmo.py:211] Finished 2000 sentences.
  output = self.softmax(output)


Epoch: 0	 Train Loss: 418.9075495004654	 Validation Loss: 1.0650625228881836


  'precision', 'predicted', average, warn_for)


Epoch: 1	 Train Loss: 406.34952968358994	 Validation Loss: 1.0177041292190552
Epoch: 2	 Train Loss: 385.0301075577736	 Validation Loss: 1.0039623975753784
Epoch: 3	 Train Loss: 421.2224864959717	 Validation Loss: 1.1020267009735107
Epoch: 4	 Train Loss: 421.242740213871	 Validation Loss: 1.075490951538086
Epoch: 5	 Train Loss: 406.44196915626526	 Validation Loss: 1.059213399887085
Epoch: 6	 Train Loss: 399.3147940635681	 Validation Loss: 1.0424540042877197
Epoch: 7	 Train Loss: 396.15958321094513	 Validation Loss: 1.0498336553573608
Epoch: 8	 Train Loss: 392.71207934617996	 Validation Loss: 1.0236924886703491
Epoch: 9	 Train Loss: 378.6180145740509	 Validation Loss: 0.985181987285614
Epoch: 10	 Train Loss: 367.55779337882996	 Validation Loss: 1.0101741552352905
Epoch: 11	 Train Loss: 391.40442430973053	 Validation Loss: 1.0114729404449463
Epoch: 12	 Train Loss: 379.39055836200714	 Validation Loss: 0.9967827796936035
Epoch: 13	 Train Loss: 371.41229099035263	 Validation Loss: 0.98455059

<elmo_on_md.evaluation.sentiment_analysis.SentimentAnalysis at 0x1508949f978>

In [4]:
train_preds = sentiment.predict(sentiment_data['train'])
print(confusion_matrix(sentiment_data['train']['labels'],train_preds))
test_preds = sentiment.predict(sentiment_data['test'])
print(confusion_matrix(sentiment_data['test']['labels'],test_preds))

I0816 23:03:50.467286 16540 elmo.py:97] 161 batches, avg len: 18.7
I0816 23:05:10.108537 16540 elmo.py:211] Finished 1000 sentences.
I0816 23:06:47.392971 16540 elmo.py:211] Finished 2000 sentences.
I0816 23:07:44.254007 16540 elmo.py:211] Finished 3000 sentences.
I0816 23:08:39.939246 16540 elmo.py:211] Finished 4000 sentences.
I0816 23:09:41.714380 16540 elmo.py:211] Finished 5000 sentences.
I0816 23:10:46.432189 16540 elmo.py:211] Finished 6000 sentences.
I0816 23:12:04.525777 16540 elmo.py:211] Finished 7000 sentences.
I0816 23:13:17.359844 16540 elmo.py:211] Finished 8000 sentences.
I0816 23:14:42.605561 16540 elmo.py:211] Finished 9000 sentences.
I0816 23:15:58.845119 16540 elmo.py:211] Finished 10000 sentences.


[[5078 1576  160]
 [ 298 2751   83]
 [  40   70  188]]


I0816 23:16:26.229613 16540 elmo.py:97] 40 batches, avg len: 18.9
I0816 23:18:09.806802 16540 elmo.py:211] Finished 1000 sentences.
I0816 23:19:29.191212 16540 elmo.py:211] Finished 2000 sentences.


[[1233  419   46]
 [  82  656   52]
 [  11   32   29]]
