In [1]:
import numpy as np
import itertools
import datetime

import tensorflow as tf
import sklearn as sk

from gensim.models import Word2Vec

import Functions as fn
import Iterator as it
from DS import DS
from Set import pool
from FFModel import FF_Model
from RNNModel import RNN_Model
From ELS2SModel import ELS2S_Model



# Data Loading

In [2]:
Dataset = pool()
Dataset.load_texts('raw_texts')
Dataset.load_labels('raw_labels')

Raw Text Load Complete
Raw Labels Load Complete


In [3]:
target_dict = fn.load_labels('final_meta/labels')

Label Load Complete


# Embedding Generation

In [4]:
train_set = pool(data=(Dataset.get_DS(stage='test', labelled='yes')).data[:-10])
validation_set = pool(data=(Dataset.get_DS(stage='test', labelled='yes')).data[-10:])
test_set = Dataset.get_DS(stage='train', labelled='yes')
set_1 = Dataset.get_DS(stage='train', labelled='no')
set_2 = Dataset.get_DS(stage='test', labelled='no')
set_1.append(set_2.data)
set_1.append(train_set.data)
emb_set = set_1
print(emb_set.size, train_set.size, validation_set.size, test_set.size)

4585 238 10 10


In [5]:
#emb_set.process_for_embedding()
#sentences = emb_set.get_sentences()
#fn.write_sentences(sentences, 'final_meta/sentences')
sentences = fn.load_sentences('final_meta/sentences')

#model = Word2Vec(sentences, min_count=1, size=100)
#model.save('final_meta/W2V')
model = Word2Vec.load('final_meta/W2V')

vocab = model.wv.vocab.keys()

Sentence Load Complete


# Layer and Index Loading

In [6]:
#word_indices, emb_layer = fn.get_index_and_emb_layer(model)
#fn.write_word_indices(word_indices, 's2s/word_indices')
#fn.write_emb_layer(emb_layer, 's2s/emb_layer')

word_indices = fn.load_word_indices('s2s/word_indices')
emb_layer = fn.load_emb_layer('s2s/emb_layer')

Word Indices Load Complete
Embedding Layer Load Complete


# ELS2S Testing

In [7]:
train_set.process_for_els2s_testing()
validation_set.process_for_els2s_testing()
test_set.process_for_els2s_testing()

In [9]:
max_tok, max_inp = 0, 0
for setin in [train_set, validation_set, test_set]:
    for case in setin.data:
        for inp_words in case.inp_words:
            max_inp = max(max_inp, len(inp_words))
        for inp_tok in case.inp_toks:
            max_tok = max(max_tok, len(inp_tok))
print(max_tok, max_inp)

14 94


In [11]:
sets = {}
sets['train'] = train_set.get_els2s_sets(word_indices, max_tok, max_inp)
sets['validation'] = validation_set.get_els2s_sets(word_indices, max_tok, max_inp)
sets['test'] = test_set.get_els2s_sets(word_indices, max_tok, max_inp)

In [17]:
ELS2S = ELS2S_Model(decay = 0.000025,
                batch=50,
                enc_vocab_size=len(word_indices), 
                dec_vocab_size=len(word_indices), 
                enc_emb_size=100, 
                dec_emb_size=100, 
                state_size=101, 
                dropout=1.0,
                learn_rate=0.001,
                max_gradient_norm=5,
                enc_emb_layer=emb_layer)
ELS2S.build_graph()
ELS2S.train(sets=sets, epochs=100, report_percentage=1, show_progress=True, show_plot=True)

Epoch: 0, Learn Rate: 0.0010000, Perplexity: 1.94
Epoch: 1, Learn Rate: 0.0010000, Perplexity: 0.93
Epoch: 2, Learn Rate: 0.0010000, Perplexity: 0.81
Epoch: 3, Learn Rate: 0.0010000, Perplexity: 0.67
Epoch: 4, Learn Rate: 0.0010000, Perplexity: 0.57
Epoch: 5, Learn Rate: 0.0010000, Perplexity: 0.54
Epoch: 6, Learn Rate: 0.0010000, Perplexity: 0.43
Epoch: 7, Learn Rate: 0.0010000, Perplexity: 0.36
Epoch: 8, Learn Rate: 0.0010000, Perplexity: 0.29
Epoch: 9, Learn Rate: 0.0010000, Perplexity: 0.27
Epoch: 10, Learn Rate: 0.0010000, Perplexity: 0.22
Epoch: 11, Learn Rate: 0.0010000, Perplexity: 0.18
Epoch: 12, Learn Rate: 0.0010000, Perplexity: 0.17
Epoch: 13, Learn Rate: 0.0010000, Perplexity: 0.15
Epoch: 14, Learn Rate: 0.0010000, Perplexity: 0.12
Epoch: 15, Learn Rate: 0.0010000, Perplexity: 0.12
Epoch: 16, Learn Rate: 0.0010000, Perplexity: 0.10
Epoch: 17, Learn Rate: 0.0010000, Perplexity: 0.08
Epoch: 18, Learn Rate: 0.0010000, Perplexity: 0.07
Epoch: 19, Learn Rate: 0.0010000, Perplex

In [27]:
tru_seqs = [label for entry in sets['test'][4] for label in entry]

In [26]:
res = []
for i in range(len(sets['test'][0])):
    dummy = [[], [], [], [], [], []]
    for j in range(50):
        for k in range(6):
            dummy[k].append(sets['test'][k][i])
    temp = ELS2S.predict(dummy)
    res.append(temp[0])

In [32]:
res_seqs = [label for entry in res for label in entry]

In [49]:
sk.metrics.f1_score(tru_seqs, res_seqs, labels=[1, 2, 3, 4, 5, 6], average='micro')

0.82042494859492809

In [44]:
tru_seq_m = [1 if label==1 else 0 for label in tru_seqs]
tru_seq_do = [1 if label==2 else 0 for label in tru_seqs]
tru_seq_mo = [1 if label==3 else 0 for label in tru_seqs]
tru_seq_f = [1 if label==4 else 0 for label in tru_seqs]
tru_seq_du = [1 if label==5 else 0 for label in tru_seqs]
tru_seq_r = [1 if label==6 else 0 for label in tru_seqs]

res_seq_m = [1 if label==1 else 0 for label in res_seqs]
res_seq_do = [1 if label==2 else 0 for label in res_seqs]
res_seq_mo = [1 if label==3 else 0 for label in res_seqs]
res_seq_f = [1 if label==4 else 0 for label in res_seqs]
res_seq_du = [1 if label==5 else 0 for label in res_seqs]
res_seq_r = [1 if label==6 else 0 for label in res_seqs]

In [45]:
print('Med F1-Score: {:.3f}'.format(sk.metrics.f1_score(tru_seq_m, res_seq_m, average='binary')))
print('Dos F1-Score: {:.3f}'.format(sk.metrics.f1_score(tru_seq_do, res_seq_do, average='binary')))
print('Mod F1-Score: {:.3f}'.format(sk.metrics.f1_score(tru_seq_mo, res_seq_mo, average='binary')))
print('Fre F1-Score: {:.3f}'.format(sk.metrics.f1_score(tru_seq_f, res_seq_f, average='binary')))
print('Dur F1-Score: {:.3f}'.format(sk.metrics.f1_score(tru_seq_du, res_seq_du, average='binary')))
print('Rea F1-Score: {:.3f}'.format(sk.metrics.f1_score(tru_seq_r, res_seq_r, average='binary')))

Med F1-Score: 0.892
Dos F1-Score: 0.783
Mod F1-Score: 0.840
Fre F1-Score: 0.789
Dur F1-Score: 0.764
Rea F1-Score: 0.720


In [42]:
saver = tf.train.Saver()
saver.save(S2S.sess, "s2s/model/model.ckpt")

's2s/model/model.ckpt'

In [59]:
loaded = S2S_Model(decay = 0.00001,
                batch=50,
                enc_vocab_size=len(word_indices), 
                dec_vocab_size=len(word_indices), 
                enc_emb_size=100, 
                dec_emb_size=100, 
                state_size=128, 
                dropout=1.0,
                learn_rate=0.001,
                max_gradient_norm=5,
                enc_emb_layer=emb_layer)
loaded.build_graph()
loaded.sess = tf.Session()
loader = tf.train.Saver()
loader.restore(loaded.sess, "s2s/model/model.ckpt")

  if d.decorator_argspec is not None), _inspect.getargspec(target))


INFO:tensorflow:Restoring parameters from s2s/model/model.ckpt
