In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append('../')

from typing import List,Dict
from elmo_on_md.data_loaders.sentiment_loader import SentimentLoader
from elmo_on_md.evaluation.sentiment_analysis import SentimentAnalysis
from elmo_on_md.evaluation.model_loader import load_model
from ELMoForManyLangs.elmoformanylangs import Embedder

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam
 
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
loader = SentimentLoader()
sentiment_data = loader.load_data()

In [3]:

elmo = load_model('original')
sentiment = SentimentAnalysis(elmo,lr=1e-4)
sentences = sentiment_data['train']['sentences']
labels = sentiment_data['train']['labels']

tokens_train,tokens_test, labels_train,labels_test = train_test_split(sentences, labels, test_size=0.2, random_state=1)

train_set = {'sentences':tokens_train,'labels':labels_train}
validate_set = {'sentences':tokens_test,'labels':labels_test}

sentiment.train(train_set,validate_set,n_epochs=200)



2019-08-25 21:01:42,287 INFO: char embedding size: 2289
2019-08-25 21:01:43,291 INFO: word embedding size: 189561
2019-08-25 21:01:46,713 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(189561, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2289, 50, padding_idx=2286)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

0.0001


2019-08-25 21:02:05,444 INFO: 129 batches, avg len: 16.9
2019-08-25 21:03:00,536 INFO: Finished 1000 sentences.
2019-08-25 21:03:51,745 INFO: Finished 2000 sentences.
2019-08-25 21:05:01,768 INFO: Finished 3000 sentences.
2019-08-25 21:05:47,352 INFO: Finished 4000 sentences.
2019-08-25 21:06:39,218 INFO: Finished 5000 sentences.
2019-08-25 21:07:32,403 INFO: Finished 6000 sentences.
2019-08-25 21:08:26,743 INFO: Finished 7000 sentences.
2019-08-25 21:09:12,859 INFO: Finished 8000 sentences.
2019-08-25 21:09:26,864 INFO: 33 batches, avg len: 17.2
2019-08-25 21:10:16,553 INFO: Finished 1000 sentences.
2019-08-25 21:11:17,102 INFO: Finished 2000 sentences.
  output = self.softmax(output)


Epoch: 0	 Train Loss: 421.77441823482513	 Validation Loss: 1.0691043138504028
Epoch: 1	 Train Loss: 383.07359755039215	 Validation Loss: 0.9425480961799622
Epoch: 2	 Train Loss: 348.28995341062546	 Validation Loss: 0.8777016997337341
Epoch: 3	 Train Loss: 325.8949926495552	 Validation Loss: 0.8419930934906006
Epoch: 4	 Train Loss: 305.33130687475204	 Validation Loss: 0.8222777247428894
Epoch: 5	 Train Loss: 292.5341736674309	 Validation Loss: 0.8088980317115784
Epoch: 6	 Train Loss: 285.0077056288719	 Validation Loss: 0.797065019607544
Epoch: 7	 Train Loss: 277.813392162323	 Validation Loss: 0.8088728189468384
Epoch: 8	 Train Loss: 275.1517615914345	 Validation Loss: 0.7904177904129028
Epoch: 9	 Train Loss: 270.4338181614876	 Validation Loss: 0.8106430768966675
Epoch: 10	 Train Loss: 265.6814925670624	 Validation Loss: 0.8255361914634705
Epoch: 11	 Train Loss: 264.36902010440826	 Validation Loss: 0.8029248714447021
Epoch: 12	 Train Loss: 259.7284687757492	 Validation Loss: 0.8251733779

Epoch: 104	 Train Loss: 223.86141765117645	 Validation Loss: 0.8065392971038818
Epoch: 105	 Train Loss: 223.95570141077042	 Validation Loss: 0.806482195854187
Epoch: 106	 Train Loss: 223.78172707557678	 Validation Loss: 0.8136574625968933
Epoch: 107	 Train Loss: 226.48052644729614	 Validation Loss: 0.7774620652198792
Epoch: 108	 Train Loss: 227.72796016931534	 Validation Loss: 0.7922357320785522
Epoch: 109	 Train Loss: 226.66149580478668	 Validation Loss: 0.7961627840995789
Epoch: 110	 Train Loss: 223.86974161863327	 Validation Loss: 0.8063536882400513
Epoch: 111	 Train Loss: 223.7767617702484	 Validation Loss: 0.8127657771110535
Epoch: 112	 Train Loss: 223.15650880336761	 Validation Loss: 0.8161711692810059
Epoch: 113	 Train Loss: 224.04628014564514	 Validation Loss: 0.8045825362205505
Epoch: 114	 Train Loss: 223.1669016480446	 Validation Loss: 0.7988268733024597
Epoch: 115	 Train Loss: 223.3756331205368	 Validation Loss: 0.8016646504402161
Epoch: 116	 Train Loss: 223.33297777175903	 

<elmo_on_md.evaluation.sentiment_analysis.SentimentAnalysis at 0x1aca02967b8>

In [4]:
train_preds = sentiment.predict(sentiment_data['train'])
print(confusion_matrix(sentiment_data['train']['labels'],train_preds))
test_preds = sentiment.predict(sentiment_data['test'])
print(confusion_matrix(sentiment_data['test']['labels'],test_preds))
from sklearn.metrics import precision_recall_fscore_support,classification_report
from sklearn.metrics import precision_recall_fscore_support,classification_report
print(classification_report(sentiment_data['train']['labels'],train_preds,target_names=['bad','neutral','good']))
print(classification_report(sentiment_data['test']['labels'],test_preds,target_names=['bad','neutral','good']))


I0826 00:16:09.515891 16988 elmo.py:97] 161 batches, avg len: 17.0
I0826 00:17:18.708282 16988 elmo.py:211] Finished 1000 sentences.
I0826 00:18:09.752365 16988 elmo.py:211] Finished 2000 sentences.
I0826 00:19:11.690146 16988 elmo.py:211] Finished 3000 sentences.
I0826 00:20:04.656741 16988 elmo.py:211] Finished 4000 sentences.
I0826 00:21:08.080207 16988 elmo.py:211] Finished 5000 sentences.
I0826 00:22:00.888988 16988 elmo.py:211] Finished 6000 sentences.
I0826 00:22:59.192112 16988 elmo.py:211] Finished 7000 sentences.
I0826 00:23:49.285412 16988 elmo.py:211] Finished 8000 sentences.
I0826 00:24:28.830652 16988 elmo.py:211] Finished 9000 sentences.
I0826 00:25:37.947032 16988 elmo.py:211] Finished 10000 sentences.


[[6336  357  121]
 [ 223 2841   68]
 [  18   14  266]]


I0826 00:26:17.425742 16988 elmo.py:97] 40 batches, avg len: 17.1
I0826 00:27:16.036770 16988 elmo.py:211] Finished 1000 sentences.
I0826 00:28:21.057963 16988 elmo.py:211] Finished 2000 sentences.


[[1521  144   33]
 [  98  658   34]
 [  16   20   36]]
              precision    recall  f1-score   support

         bad       0.96      0.93      0.95      6814
     neutral       0.88      0.91      0.90      3132
        good       0.58      0.89      0.71       298

    accuracy                           0.92     10244
   macro avg       0.81      0.91      0.85     10244
weighted avg       0.93      0.92      0.92     10244

              precision    recall  f1-score   support

         bad       0.93      0.90      0.91      1698
     neutral       0.80      0.83      0.82       790
        good       0.35      0.50      0.41        72

    accuracy                           0.87      2560
   macro avg       0.69      0.74      0.71      2560
weighted avg       0.87      0.87      0.87      2560



In [4]:
train_preds = sentiment.predict(sentiment_data['train'])
print(confusion_matrix(sentiment_data['train']['labels'],train_preds))
test_preds = sentiment.predict(sentiment_data['test'])
print(confusion_matrix(sentiment_data['test']['labels'],test_preds))
from sklearn.metrics import precision_recall_fscore_support,classification_report
from sklearn.metrics import precision_recall_fscore_support,classification_report
print(classification_report(sentiment_data['train']['labels'],train_preds,target_names=['bad','neutral','good']))
print(classification_report(sentiment_data['test']['labels'],test_preds,target_names=['bad','neutral','good']))


I0824 12:49:25.633167 21040 elmo.py:97] 161 batches, avg len: 17.0
I0824 12:50:33.290285 21040 elmo.py:211] Finished 1000 sentences.
I0824 12:51:23.120063 21040 elmo.py:211] Finished 2000 sentences.
I0824 12:52:18.743351 21040 elmo.py:211] Finished 3000 sentences.
I0824 12:53:30.671048 21040 elmo.py:211] Finished 4000 sentences.
I0824 12:54:28.752764 21040 elmo.py:211] Finished 5000 sentences.
I0824 12:55:16.594856 21040 elmo.py:211] Finished 6000 sentences.
I0824 12:56:22.606369 21040 elmo.py:211] Finished 7000 sentences.
I0824 12:57:20.747925 21040 elmo.py:211] Finished 8000 sentences.
I0824 12:58:08.270870 21040 elmo.py:211] Finished 9000 sentences.
I0824 12:58:56.832039 21040 elmo.py:211] Finished 10000 sentences.
  return output


[[5886  872   56]
 [ 182 2918   32]
 [  15   45  238]]


I0824 12:59:34.101390 21040 elmo.py:97] 40 batches, avg len: 17.1
I0824 13:00:28.782982 21040 elmo.py:211] Finished 1000 sentences.
I0824 13:01:30.980684 21040 elmo.py:211] Finished 2000 sentences.


[[1389  287   22]
 [  75  703   12]
 [  11   32   29]]
              precision    recall  f1-score   support

         bad       0.97      0.86      0.91      6814
     neutral       0.76      0.93      0.84      3132
        good       0.73      0.80      0.76       298

    accuracy                           0.88     10244
   macro avg       0.82      0.86      0.84     10244
weighted avg       0.90      0.88      0.89     10244

              precision    recall  f1-score   support

         bad       0.94      0.82      0.88      1698
     neutral       0.69      0.89      0.78       790
        good       0.46      0.40      0.43        72

    accuracy                           0.83      2560
   macro avg       0.70      0.70      0.69      2560
weighted avg       0.85      0.83      0.83      2560



In [1]:
from sklearn.metrics import precision_recall_fscore_support,classification_report
print(classification_report(sentiment_data['train']['labels'],train_preds,target_names=['bad','neutral','good']))

NameError: name 'sentiment_data' is not defined

In [None]:
from sklearn.metrics import precision_recall_fscore_support,classification_report
print(classification_report(sentiment_data['test']['labels'],test_preds,target_names=['bad','neutral','good']))

In [None]:
sentiment.train(train_set,validate_set,n_epochs=100)

In [None]:
train_preds = sentiment.predict(sentiment_data['train'])
print(confusion_matrix(sentiment_data['train']['labels'],train_preds))
test_preds = sentiment.predict(sentiment_data['test'])
print(confusion_matrix(sentiment_data['test']['labels'],test_preds))

In [6]:
import numpy as np
from tensorflow.python.summary.event_accumulator import EventAccumulator


log_file = "D://Projects/events.out.tfevents.1566034114.DESKTOP-OE11P6R.5500.0"
plot_tensorflow_log(log_file)

ModuleNotFoundError: No module named 'tensorflow.python.summary.event_accumulator'