# Grid search 

In [1]:
import sys
sys.path.append("../")

import itertools
import random
import pandas as pd
import numpy as np
from config import Config
import keras


VALIDATION_SPLIT = 0.1

In [2]:
from preprocessing.tokenizationWordVect import tokenize_frame
from keras.layers import Conv1D, Dropout, Flatten, Dense

In [3]:
level_1_Dropout = [0.5,0.2]
filters = [64, 32]
kernel_size = [5,3]
activation_conv = ['relu']
hidden_layers = [[(32,'relu',0.0),(16,'relu', 0.0)],[(32,'relu',0.1),(16,'relu', 0.1)],  [(24,'relu', 0.0)], [(24,'relu', 0.1)] ]
loss = ['mse']
epochs = [10, 20]


combinations = list(itertools.product(*[filters, kernel_size, activation_conv, level_1_Dropout, hidden_layers, loss, epochs]))
combinations



validation_performance = []

training_data = pd.read_csv(Config.TRAINING_DATASET_PATH)

from preprocessing.pipeline import ItalianTweetsPreprocessingPipeline
pp = ItalianTweetsPreprocessingPipeline()

preprocessed_training_data = pp.apply(training_data)

x = preprocessed_training_data['text']
y_1 = list(preprocessed_training_data['irony'])
y_2 = list(preprocessed_training_data['sarcasm'])
y = list(zip(y_1, y_2))

x= tokenize_frame(x, "sostituisciTag")
x = np.asarray(x)

tmp = list(zip(x,y))
random.shuffle(tmp)
x, y = zip(*tmp)


validate_size = int(len(x)*VALIDATION_SPLIT)
x_train, x_validate = x[:-validate_size], x[-validate_size:]
x_train = np.asarray(x_train)
x_validate = np.asarray(x_validate)
y_train, y_validate = y[:-validate_size], y[-validate_size:]
y_train = np.asarray(y_train)
y_validate = np.asarray(y_validate)


In [4]:
results = []
index = 0
print("numero di combinazioni da testare " + str(len(combinations)))
for combination in combinations:
    print("index: " + str(index) + " Configurazione: " + str(combination))
    
    model = keras.models.Sequential()
    model.add(Conv1D(filters=combination[0], kernel_size=combination[1], activation=combination[2], input_shape=(65,128)))
    if(combination[3] != 0.0):
        model.add(Dropout(combination[3]))
    model.add(Flatten())
    for e in combination[4]:
        model.add(Dense(e[0], activation=e[1]))
        if(e[2] != 0.0):
            model.add(Dropout(e[2]))
    model.add(Dense(2, activation = 'sigmoid'))
    model.compile(loss=combination[5], optimizer="adam", metrics=['acc'])
    
    history = model.fit(x_train, y_train, epochs=combination[6], verbose=0)
    result = model.evaluate(x_validate, y_validate)
    results.append([combination, dict(zip(model.metrics_names, result))])
    index += 1
    
    
    



numero di combinazioni da testare 64
index: 0 Configurazione: (64, 5, 'relu', 0.5, [(32, 'relu', 0.0), (16, 'relu', 0.0)], 'mse', 10)
index: 1 Configurazione: (64, 5, 'relu', 0.5, [(32, 'relu', 0.0), (16, 'relu', 0.0)], 'mse', 20)
index: 2 Configurazione: (64, 5, 'relu', 0.5, [(32, 'relu', 0.1), (16, 'relu', 0.1)], 'mse', 10)
index: 3 Configurazione: (64, 5, 'relu', 0.5, [(32, 'relu', 0.1), (16, 'relu', 0.1)], 'mse', 20)
index: 4 Configurazione: (64, 5, 'relu', 0.5, [(24, 'relu', 0.0)], 'mse', 10)
index: 5 Configurazione: (64, 5, 'relu', 0.5, [(24, 'relu', 0.0)], 'mse', 20)
index: 6 Configurazione: (64, 5, 'relu', 0.5, [(24, 'relu', 0.1)], 'mse', 10)
index: 7 Configurazione: (64, 5, 'relu', 0.5, [(24, 'relu', 0.1)], 'mse', 20)
index: 8 Configurazione: (64, 5, 'relu', 0.2, [(32, 'relu', 0.0), (16, 'relu', 0.0)], 'mse', 10)
index: 9 Configurazione: (64, 5, 'relu', 0.2, [(32, 'relu', 0.0), (16, 'relu', 0.0)], 'mse', 20)
index: 10 Configurazione: (64, 5, 'relu', 0.2, [(32, 'relu', 0.1), (1

In [5]:
metric = 'acc'
values = []


for i in range(len(results)):
    values.append((results[i][0],results[i][1][metric]))
                  
values.sort(key=lambda tup: tup[1], reverse = True)

print('Best Hyperparameter Configurations')
for v in values:
    print(v[0], v[1])

Best Hyperparameter Configurations
(64, 5, 'relu', 0.5, [(32, 'relu', 0.1), (16, 'relu', 0.1)], 'mse', 10) 1.0
(64, 5, 'relu', 0.5, [(32, 'relu', 0.1), (16, 'relu', 0.1)], 'mse', 20) 1.0
(64, 5, 'relu', 0.5, [(24, 'relu', 0.0)], 'mse', 10) 1.0
(64, 5, 'relu', 0.2, [(32, 'relu', 0.0), (16, 'relu', 0.0)], 'mse', 10) 1.0
(64, 5, 'relu', 0.2, [(32, 'relu', 0.1), (16, 'relu', 0.1)], 'mse', 10) 1.0
(64, 5, 'relu', 0.2, [(24, 'relu', 0.1)], 'mse', 10) 1.0
(64, 3, 'relu', 0.5, [(32, 'relu', 0.0), (16, 'relu', 0.0)], 'mse', 10) 1.0
(64, 3, 'relu', 0.5, [(32, 'relu', 0.1), (16, 'relu', 0.1)], 'mse', 10) 1.0
(64, 3, 'relu', 0.5, [(32, 'relu', 0.1), (16, 'relu', 0.1)], 'mse', 20) 1.0
(64, 3, 'relu', 0.5, [(24, 'relu', 0.1)], 'mse', 10) 1.0
(64, 3, 'relu', 0.5, [(24, 'relu', 0.1)], 'mse', 20) 1.0
(64, 3, 'relu', 0.2, [(32, 'relu', 0.1), (16, 'relu', 0.1)], 'mse', 10) 1.0
(32, 5, 'relu', 0.5, [(32, 'relu', 0.0), (16, 'relu', 0.0)], 'mse', 10) 1.0
(32, 5, 'relu', 0.5, [(32, 'relu', 0.0), (16, 'relu',

# Best Model

In [None]:
from pathlib import Path
import sys
sys.path.append("../")


from config import Config
import pandas as pd

In [None]:
training_data = pd.read_csv(Config.TRAINING_DATASET_PATH)
test_data = pd.read_csv(Config.TEST_DATASET_PATH)
del test_data['id']
del test_data['topic']

In [None]:
from preprocessing.pipeline import ItalianTweetsPreprocessingPipeline
pp = ItalianTweetsPreprocessingPipeline()

In [None]:
preprocessed_training_data = pp.apply(training_data)
preprocessed_test_data = pp.apply(test_data)


In [None]:
x_train = preprocessed_training_data['text']
x_test = preprocessed_test_data['text']
y_train = preprocessed_training_data[['irony','sarcasm']]
y_test = preprocessed_test_data[['irony','sarcasm']]

In [None]:
import keras
from keras.layers import Conv1D, Dense, Dropout, Flatten

import sklearn
import numpy as np
from custom_metrics import computeAvgF1, model_test_2out

from preprocessing.tokenizationWordVect import tokenize_frame
import numpy as np

VALIDATION_SIZE = 0.1

In [None]:
# Seed value
seed_value= 450

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)
# for later versions: 
# tf.compat.v1.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session
from keras import backend as K
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
K.set_session(sess)

In [None]:
def model_conv(x, y, combination):
    
    model = keras.models.Sequential()
    model.add(Conv1D(filters=combination[0], kernel_size=combination[1], activation=combination[2], input_shape=(65,128)))
    if(combination[3] != 0.0):
        model.add(Dropout(combination[3]))
    model.add(Flatten())
    for e in combination[4]:
        model.add(Dense(e[0], activation=e[1]))
        if(e[2] != 0.0):
            model.add(Dropout(e[2]))
    model.add(Dense(2, activation = 'sigmoid'))
    model.compile(loss=combination[5], optimizer="adam", metrics=['acc'])
    
    history = model.fit(x, y, epochs=combination[6], validation_split = VALIDATION_SIZE)
    
    
    return model

In [None]:
x_train_conv = tokenize_frame(x_train, "sostituisciTag")
x_train_conv = np.asarray(x_train_conv)

x_test_conv = tokenize_frame(x_test, "sostituisciTag")
x_test_conv = np.asarray(x_test_conv)

hyperparameters = [64, 5, 'relu', 0.5, [(32, 'relu', 0.1), (16, 'relu', 0.1)], 'mse', 10]
model = model_conv(x_train_conv, y_train, hyperparameters)


In [None]:
from custom_metrics import computePerformanceTaskB_2output

print("F1 Average Task A-B")
computePerformanceTaskB_2output(model, x_test_conv, y_test, y_test['irony'])