In [1]:
import os

In [2]:
from typing import List, Tuple

from elit.component import Component
from elit.embedding import FastText

import csv
# from src.util import tsv_reader
# tsv_reader = csv.reader(open(tsv_file, 'rb'), delimiter='\t')

In [3]:
from util import tsv_reader

In [4]:
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Dense, concatenate, Dropout, Activation, Flatten
from keras.models import Model,model_from_json

Using TensorFlow backend.


In [5]:
import numpy as np
from keras.utils import np_utils

In [6]:
from keras.optimizers import Adam

In [7]:
resource_dir = '../res/'

In [8]:
embedding_file = 'fasttext-50-180614.bin'

In [9]:
trn_data = tsv_reader(resource_dir, 'sst.trn.tsv')
dev_data = tsv_reader(resource_dir, 'sst.dev.tsv')
tst_data = tsv_reader(resource_dir, 'sst.tst.tsv')

In [10]:
# trn_data = csv.reader(open(resource_dir+'sst.trn.tsv', 'r'), delimiter='\t')
# dev_data = csv.reader(open(resource_dir+'sst.dev.tsv', 'r'), delimiter='\t')
# tst_data = csv.reader(open(resource_dir+'sst.tst.tsv', 'r'), delimiter='\t')

In [11]:
vsm = FastText(os.path.join(resource_dir, embedding_file))

In [12]:
trn_ys, trn_xs = zip(*[(y, vsm.emb_list(x)) for y, x in trn_data])
dev_ys, dev_xs = zip(*[(y, vsm.emb_list(x)) for y, x in dev_data])

In [13]:
number_of_classes = 5

Y_train = np_utils.to_categorical(trn_ys, number_of_classes)
Y_dev = np_utils.to_categorical(dev_ys, number_of_classes)

In [14]:
max_sentence_length = 80

blank_embedding = vsm.emb_list(' ')[0]
train_xs = []
for line in trn_xs:
    padding = max_sentence_length - len(line)
    for i in range(0, padding):
        line.append(blank_embedding)
    train_xs.append(line)
    
train_xs = np.array(train_xs)

devlop_xs = []
for line in dev_xs:
    padding = max_sentence_length - len(line)
    for i in range(0, padding):
        line.append(blank_embedding)
    devlop_xs.append(line)
devlop_xs = np.array(devlop_xs)

train_xs = train_xs.reshape(train_xs.shape[0], train_xs.shape[1], train_xs.shape[2], 1)
devlop_xs = devlop_xs.reshape(devlop_xs.shape[0], devlop_xs.shape[1], devlop_xs.shape[2], 1)

In [15]:
first_ksize = 3
second_ksize = 4
third_ksize = 5

In [16]:
embedding_dim = train_xs.shape[2]

In [17]:
# import regularizer
from keras.regularizers import l1,l2

In [51]:
# instantiate regularizer
# reg = l2(0.033)
reg = l2(0.15)

In [52]:
image_input = Input(shape=(max_sentence_length,embedding_dim, 1))


first_kernel = Conv2D(64, (first_ksize, embedding_dim),strides=(1, 1),padding='valid', activation = 'relu')(image_input)
first_kernel = MaxPooling2D(pool_size=(max_sentence_length-first_ksize+1, 1), strides=(1,1), padding='valid')(first_kernel)
first_kernel = Flatten()(first_kernel)
# first_kernel = Dense(512, activation='relu', activity_regularizer=reg)(first_kernel)


second_kernel = Conv2D(64, (second_ksize, embedding_dim),strides=(1, 1),padding='valid', activation = 'relu')(image_input)
second_kernel = MaxPooling2D(pool_size=(max_sentence_length-second_ksize+1, 1), strides=(1,1), padding='valid')(second_kernel)
second_kernel = Flatten()(second_kernel)
# second_kernel = Dense(512, activation='relu',activity_regularizer=reg)(second_kernel)


third_kernel = Conv2D(64, (third_ksize, embedding_dim),strides=(1, 1),padding='valid', activation = 'relu')(image_input)
third_kernel = MaxPooling2D(pool_size=(max_sentence_length-third_ksize+1, 1), strides=(1,1), padding='valid')(third_kernel)
third_kernel = Flatten()(third_kernel)
# third_kernel = Dense(512, activation='relu', activity_regularizer=reg)(third_kernel)

merged = concatenate([first_kernel, second_kernel, third_kernel])
merged = Dropout(0.5)(merged)
# merged = Dropout(0.5)(first_kernel)
output = Dense(5, activation='softmax', activity_regularizer=reg)(merged)

In [53]:
model = Model(inputs=[image_input], outputs=output)

In [54]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

In [55]:
gen = ImageDataGenerator()

test_gen = ImageDataGenerator()

In [56]:
train_generator = gen.flow(train_xs, Y_train, batch_size=50)
test_generator = test_gen.flow(devlop_xs,Y_dev, batch_size = 50)

In [57]:
model.fit_generator(train_generator, steps_per_epoch=train_xs.shape[0]//50, epochs=15, 
                    validation_data=test_generator, validation_steps=devlop_xs.shape[0]//50)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x1369850f0>

In [59]:
pred = model.predict(train_xs[:100])

In [60]:
y_classes = pred.argmax(axis=-1)

In [61]:
y_classes

array([3, 4, 3, 3, 3, 4, 4, 4, 3, 3, 4, 2, 3, 2, 3, 3, 3, 2, 4, 3, 3, 2,
       2, 4, 2, 2, 4, 3, 4, 1, 4, 3, 2, 3, 4, 3, 3, 3, 2, 3, 3, 4, 3, 3,
       2, 0, 3, 4, 1, 4, 3, 4, 3, 3, 1, 4, 4, 3, 3, 3, 1, 4, 3, 3, 1, 1,
       4, 3, 1, 4, 3, 1, 1, 3, 3, 3, 1, 4, 4, 4, 4, 3, 4, 3, 3, 4, 4, 3,
       3, 4, 3, 3, 4, 4, 3, 3, 3, 3, 3, 4])

In [None]:
train_xs.shape

In [None]:
shape

In [None]:
trn_xs[0]

In [None]:
trn_xs = np.array(trn_xs)
dev_xs = np.array(dev_xs)

In [None]:
max_document_length = max([len(x) for x in dev_xs])

In [None]:
min_document_length = min([len(x) for x in trn_xs])

In [None]:
max_document_length

In [None]:
min_document_length

In [None]:
trn_data

In [None]:
dev_xs = np.asarray(dev_xs)

In [None]:
len(dev_xs[0])

In [None]:
dev_xs.reshape(dev_xs.size, len(dev_xs[0]), len(dev_xs[0][0]))

In [None]:
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")

In [None]:
# tsv_reader = csv.reader(open(tsv_file, 'rb'), delimiter='\t')