In [1]:
import os, re
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Bidirectional
from keras.layers import GlobalMaxPool1D, Conv1D, MaxPooling1D, Flatten, GRU
from keras.models import Model, Sequential
from keras import initializers, regularizers, constraints, optimizers, layers
from keras.callbacks.callbacks import EarlyStopping, ModelCheckpoint
from keras.wrappers.scikit_learn import KerasClassifier
from keras.metrics import AUC
from keras.models import load_model

Using TensorFlow backend.


# Open and save data and weights

In [2]:
x_tr_1 = np.genfromtxt('data_preproc/x_tr_1_it.csv')

In [3]:
x_tr_1.shape

(7410, 200)

In [4]:
x_ts_1 = np.genfromtxt('data_preproc/x_ts_1_it.csv')

In [5]:
x_ts_1.shape

(1998, 200)

In [6]:
x_tr_2 = np.genfromtxt('data_preproc/x_tr_2_it.csv')

In [7]:
x_tr_2.shape

(7410, 200)

In [8]:
x_ts_2 = np.genfromtxt('data_preproc/x_ts_2_it.csv')

In [9]:
x_ts_2.shape

(1998, 200)

In [10]:
x_tr_3 = np.genfromtxt('data_preproc/x_tr_3_it.csv')

In [11]:
x_tr_3.shape

(7410, 200)

In [12]:
x_ts_3 = np.genfromtxt('data_preproc/x_ts_3_it.csv')

In [13]:
x_ts_3.shape

(1998, 200)

In [14]:
x_tr_4 = np.genfromtxt('data_preproc/x_tr_4_it.csv')

In [15]:
x_tr_4.shape

(7410, 200)

In [16]:
x_ts_4 = np.genfromtxt('data_preproc/x_ts_4_it.csv')

In [17]:
x_ts_4.shape

(1998, 200)

In [18]:
emb_matrix_ft = np.genfromtxt('data_preproc/emb_matr_ft_it.csv')

In [19]:
emb_matrix_ft.shape

(30000, 200)

In [20]:
train = pd.read_csv('data_it/training_set_sentipolc16.csv/training_set_sentipolc16.csv')
col = ['opos', 'oneg']
y_tr = train[col].values

# Build the model

In [21]:
max_len = 200
max_feature = 30000

In [22]:
def build_model(emb_matrix):
    """
    Questa funzione crea il modello e lo compila
    return: a model
    
    emb_matrix: matrice che inizializza i pesi dell'Embedding layer
    """
        
    inp = Input(shape=(None,))
    emb = Embedding(max_feature, 200, input_length=max_len, weights=[emb_matrix])(inp)
    lstm = LSTM(units=60, dropout=0.3, recurrent_dropout=0.3, return_sequences=True)(emb)
    fl = Flatten()(lstm)
    d1 = Dense(100, activation='relu')(fl)
    out = Dense(2, activation='sigmoid')(d1)
    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [23]:
cp = ModelCheckpoint('models_it/best_lstm_IT_pos.h5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
es = EarlyStopping(patience=10, verbose=1)
callbacks = [es,cp]

In [25]:
# training with x_1: without punctuation and digits

model1 = build_model(emb_matrix_ft)
history1_1 = model1.fit(x_tr_1, y_tr, batch_size=128, epochs=50, validation_split=0.1, callbacks=callbacks)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 6669 samples, validate on 741 samples
Epoch 1/50

Epoch 00001: val_accuracy improved from -inf to 0.70918, saving model to models_it/best_lstm_IT_pos.h5
Epoch 2/50

Epoch 00002: val_accuracy did not improve from 0.70918
Epoch 3/50

Epoch 00003: val_accuracy did not improve from 0.70918
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.70918 to 0.71592, saving model to models_it/best_lstm_IT_pos.h5
Epoch 5/50

Epoch 00005: val_accuracy improved from 0.71592 to 0.73077, saving model to models_it/best_lstm_IT_pos.h5
Epoch 6/50

Epoch 00006: val_accuracy did not improve from 0.73077
Epoch 7/50

Epoch 00007: val_accuracy did not improve from 0.73077
Epoch 8/50

Epoch 00008: val_accuracy did not improve from 0.73077
Epoch 9/50

Epoch 00009: val_accuracy did not improve from 0.73077
Epoch 10/50

Epoch 00010: val_accuracy did not improve from 0.73077
Epoch 11/50

Epoch 00011: val_accuracy did not improve from 0.73077
Epoch 12/50

Epoch 00012: val_accuracy did not improve from 0.73

In [26]:
# training with x_2: without URLs, abbreviations and tags

model2 = build_model(emb_matrix_ft)

history1_2 = model2.fit(x_tr_2, y_tr, batch_size=128, epochs=50, validation_split=0.1, callbacks=callbacks)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 6669 samples, validate on 741 samples
Epoch 1/50

Epoch 00001: val_accuracy improved from 0.73077 to 0.75978, saving model to models_it/best_lstm_IT_pos.h5
Epoch 2/50

Epoch 00002: val_accuracy did not improve from 0.75978
Epoch 3/50

Epoch 00003: val_accuracy did not improve from 0.75978
Epoch 4/50

Epoch 00004: val_accuracy did not improve from 0.75978
Epoch 5/50

Epoch 00005: val_accuracy did not improve from 0.75978
Epoch 6/50

Epoch 00006: val_accuracy did not improve from 0.75978
Epoch 7/50

Epoch 00007: val_accuracy did not improve from 0.75978
Epoch 8/50

Epoch 00008: val_accuracy did not improve from 0.75978
Epoch 9/50

Epoch 00009: val_accuracy did not improve from 0.75978
Epoch 10/50

Epoch 00010: val_accuracy did not improve from 0.75978
Epoch 11/50

Epoch 00011: val_accuracy did not improve from 0.75978
Epoch 12/50

Epoch 00012: val_accuracy did not improve from 0.75978
Epoch 00012: early stopping


In [27]:
# training with x_3: with lemmatization

model3 = build_model(emb_matrix_ft)

history1_3 = model3.fit(x_tr_3, y_tr, batch_size=128, epochs=50, validation_split=0.1, callbacks=callbacks)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 6669 samples, validate on 741 samples
Epoch 1/50

Epoch 00001: val_accuracy improved from 0.75978 to 0.82321, saving model to models_it/best_lstm_IT_pos.h5
Epoch 2/50

Epoch 00002: val_accuracy did not improve from 0.82321
Epoch 3/50

Epoch 00003: val_accuracy did not improve from 0.82321
Epoch 4/50

Epoch 00004: val_accuracy did not improve from 0.82321
Epoch 5/50

Epoch 00005: val_accuracy did not improve from 0.82321
Epoch 6/50

Epoch 00006: val_accuracy did not improve from 0.82321
Epoch 7/50

Epoch 00007: val_accuracy did not improve from 0.82321
Epoch 8/50

Epoch 00008: val_accuracy did not improve from 0.82321
Epoch 9/50

Epoch 00009: val_accuracy did not improve from 0.82321
Epoch 10/50

Epoch 00010: val_accuracy did not improve from 0.82321
Epoch 11/50

Epoch 00011: val_accuracy did not improve from 0.82321
Epoch 12/50

Epoch 00012: val_accuracy did not improve from 0.82321
Epoch 00012: early stopping


In [28]:
# training with x_4: with stemming

model4 = build_model(emb_matrix_ft)

history1_4 = model4.fit(x_tr_4, y_tr, batch_size=128, epochs=50, validation_split=0.1, callbacks=callbacks)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 6669 samples, validate on 741 samples
Epoch 1/50

Epoch 00001: val_accuracy did not improve from 0.82321
Epoch 2/50

Epoch 00002: val_accuracy did not improve from 0.82321
Epoch 3/50

Epoch 00003: val_accuracy did not improve from 0.82321
Epoch 4/50

Epoch 00004: val_accuracy did not improve from 0.82321
Epoch 5/50

Epoch 00005: val_accuracy did not improve from 0.82321
Epoch 6/50

Epoch 00006: val_accuracy did not improve from 0.82321
Epoch 7/50

Epoch 00007: val_accuracy did not improve from 0.82321
Epoch 8/50

Epoch 00008: val_accuracy did not improve from 0.82321
Epoch 9/50

Epoch 00009: val_accuracy did not improve from 0.82321
Epoch 10/50

Epoch 00010: val_accuracy did not improve from 0.82321
Epoch 11/50

Epoch 00011: val_accuracy did not improve from 0.82321
Epoch 12/50

Epoch 00012: val_accuracy did not improve from 0.82321
Epoch 00012: early stopping
