# Tuning classifiers (LSTM)

### Setup

In [1]:
%run -i 'random_state.py'
from packages import *
from clean_functions import *
from tokenizer import *
from tuners import *

General grid

In [2]:
hyper_lstm = {'neurons': [10, 25, 50, 75, 100, 150, 200], #hidden LSTM
             'lamb1': [.0, 10**-6, 5*10**-6, 10**-5, 5*10**-5, 10**-4, 5*10**-4, 10**-3, 5*10**-3], #regularization
             'lamb2': [.0, 10**-6, 5*10**-6, 10**-5, 5*10**-5, 10**-4, 5*10**-4, 10**-3, 5*10**-3],
             'score': [0], 
             'lower_ci': [0], 
             'upper_ci': [0]}

# W2V/CNN/LSTM

### Getting data ready

Loading data

In [3]:
X, y=np.load('data/X_w2v.npy'),np.load('data/y_w2v.npy')

Turning y into numeric:

In [4]:
encode={'H:Arquivado': 1,'H:Ativo': 2,'H:Suspenso': 3}
decode={1:'H:Arquivado',2:'H:Ativo',3:'H:Suspenso'}

for i in range(len(y)):
    y[i]=encode[y[i]]

Splitting the dataset in train, test and validation set:

In [5]:
y=np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_seed)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=2/3, random_state=random_seed)

y_train2=np.array(pd.get_dummies(y_train))
y_val2=np.array(pd.get_dummies(y_val))
y_test2=np.array(pd.get_dummies(y_test))

np.shape(X_train),np.shape(y_train)

((4514, 5, 70, 100), (4514,))

### Tuning classification model

Grid

In [6]:
hyper = {'ks': [3,6,9], #kernels
         'neurons': [10, 25, 50, 75, 100, 150, 200],
         'lamb1': [.0, 10**-6, 5*10**-6, 10**-5, 5*10**-5, 10**-4, 5*10**-4, 10**-3, 5*10**-3], #regularization
         'lamb2': [.0, 10**-6, 5*10**-6, 10**-5, 5*10**-5, 10**-4, 5*10**-4, 10**-3, 5*10**-3],       
         'score': [0], 
         'lower_ci': [0], 
         'upper_ci': [0]}

hyper=expand_grid(hyper.copy(), random_seed=random_seed)
hyper=hyper[['ks','neurons','lamb1','lamb2','score','lower_ci','upper_ci']]

np.shape(hyper)

(50, 7)

Search

In [7]:
num_classes = 3
Adam=optimizers.Adam(learning_rate=0.005, beta_1=0.9, beta_2=0.999, amsgrad=True)
    
for i in tqdm(range(hyper.shape[0])):
    
    #Cleaning session
    tensorflow.keras.backend.clear_session()
    
    #Hyper
    k=hyper.loc[i,'ks'] 
    neuron=hyper.loc[i,'neurons'] 
    lamb1=hyper.loc[i,'lamb1'] 
    lamb2=hyper.loc[i,'lamb2'] 

    #Model for features extraction
    inputs = Input(shape=np.shape(X_train)[1:])
    conv = TimeDistributed(Conv1D(k, 1, activation='linear', kernel_constraint=unit_norm(axis=1), use_bias=False))(inputs)
    pool = TimeDistributed(GlobalMaxPooling1D())(conv)
    model_feat = Model(inputs, pool)

    #Model for classification
    pooled_inputs = Input(shape=(5, k))
    lstm = LSTM(neuron, kernel_regularizer=regularizers.l1_l2(lamb1, lamb2))(pooled_inputs)
    soft = Dense(num_classes, activation='softmax', kernel_regularizer=regularizers.l1_l2(lamb1, lamb2))(lstm)
    model_classific = Model(pooled_inputs, soft)

    #Final model
    outputs = model_classific(model_feat(inputs))
    model = Model(inputs, outputs)

    #Compiling
    model.compile(loss='categorical_crossentropy', optimizer=Adam, metrics=['accuracy'])

    #Running
    modelo=model.fit(X_train, y_train2, epochs=50,
                                              batch_size=500,
                                              shuffle=True,
                                              verbose=False,
                                              validation_data=(X_val, y_val2))
    
    p=modelo.history['val_accuracy'][-1]
    hyper.loc[i,'score']=p
    hyper.loc[i,'lower_ci']=p-1.96*np.sqrt((p*(1-p)/np.shape(y_val)[0]))
    hyper.loc[i,'upper_ci']=p+1.96*np.sqrt((p*(1-p)/np.shape(y_val)[0]))

100%|██████████| 50/50 [27:38<00:00, 33.17s/it]


In [8]:
hyper.to_csv('hyper/hyper_lstm_w2v')

In [9]:
hyper.iloc[np.argsort(hyper.loc[:,'score']),:].tail(20)

Unnamed: 0,ks,neurons,lamb1,lamb2,score,lower_ci,upper_ci
32,9,100,1e-05,5e-05,0.925581,0.905327,0.945836
3,6,25,0.0,0.0001,0.927132,0.907072,0.947191
20,6,10,5e-05,5e-06,0.927132,0.907072,0.947191
8,9,10,0.005,1e-06,0.928682,0.908821,0.948544
44,3,100,0.0,0.0001,0.928682,0.908821,0.948544
49,3,200,0.0001,0.001,0.928682,0.908821,0.948544
48,9,75,0.0005,0.0005,0.928682,0.908821,0.948544
37,9,75,0.005,1e-05,0.931783,0.912326,0.95124
33,3,100,5e-06,0.0,0.933333,0.914083,0.952584
13,3,10,0.0005,1e-05,0.933333,0.914083,0.952584


# BERT/LSTM

### Getting data ready

Loading data:

In [10]:
X, y = np.load('data/X_bert.npy'),np.load('data/y_bert.npy')

Turning y into numeric:

In [11]:
encode={'H:Arquivado': 1,'H:Ativo': 2,'H:Suspenso': 3}
decode={1:'H:Arquivado',2:'H:Ativo',3:'H:Suspenso'}

for i in range(len(y)):
    y[i]=encode[y[i]]

Splitting the dataset in train, test and validation set:

In [12]:
y=np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_seed)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=2/3, random_state=random_seed)

y_train2=np.array(pd.get_dummies(y_train))
y_val2=np.array(pd.get_dummies(y_val))
y_test2=np.array(pd.get_dummies(y_test))

np.shape(X_train),np.shape(y_train)

((4514, 5, 768), (4514,))

### Tuning classification model

Grid

In [13]:
hyper=expand_grid(hyper_lstm.copy(), random_seed=random_seed)
hyper=hyper[['neurons','lamb1','lamb2','score','lower_ci','upper_ci']]

np.shape(hyper)

(50, 6)

Grid Search

In [14]:
hyper=tune_lstm(hyper, X_train, y_train2, X_val, y_val2)
hyper.to_csv('hyper/hyper_lstm_bert')

100%|██████████| 50/50 [10:52<00:00, 13.05s/it]


In [15]:
hyper.iloc[np.argsort(hyper.loc[:,'score']),:].tail(20)

Unnamed: 0,neurons,lamb1,lamb2,score,lower_ci,upper_ci
36,200,0.0,5e-05,0.92093,0.900105,0.941756
13,200,0.0001,1e-06,0.92093,0.900105,0.941756
31,50,5e-05,0.0001,0.922481,0.901843,0.943118
10,75,0.0,1e-06,0.922481,0.901843,0.943118
8,75,5e-06,0.0001,0.924031,0.903584,0.944478
43,150,0.0,0.0,0.925581,0.905327,0.945836
49,150,1e-06,1e-06,0.925581,0.905327,0.945836
12,75,1e-06,1e-05,0.928682,0.908821,0.948544
29,50,1e-06,5e-06,0.928682,0.908821,0.948544
19,75,5e-05,1e-05,0.928682,0.908821,0.948544


# Doc2Vec/LSTM

### Getting data ready

Loading data

In [16]:
X, y=np.load('data/X_d2v.npy'),np.load('data/y_d2v.npy')

Turning y into numeric:

In [17]:
encode={'H:Arquivado': 1,'H:Ativo': 2,'H:Suspenso': 3}
decode={1:'H:Arquivado',2:'H:Ativo',3:'H:Suspenso'}

for i in range(len(y)):
    y[i]=encode[y[i]]

Splitting the dataset in train, test and validation set:

In [18]:
y=np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_seed)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=2/3, random_state=random_seed)

y_train2=np.array(pd.get_dummies(y_train))
y_val2=np.array(pd.get_dummies(y_val))
y_test2=np.array(pd.get_dummies(y_test))

np.shape(X_train),np.shape(y_train)

((4514, 5, 100), (4514,))

### Tuning classification model

Grid

In [19]:
hyper=expand_grid(hyper_lstm.copy(), random_seed=random_seed)
hyper=hyper[['neurons','lamb1','lamb2','score','lower_ci','upper_ci']]

np.shape(hyper)

(50, 6)

Search

In [20]:
hyper=tune_lstm(hyper, X_train, y_train2, X_val, y_val2)
hyper.to_csv('hyper/hyper_lstm_d2v')

100%|██████████| 50/50 [06:41<00:00,  8.04s/it]


In [21]:
hyper.iloc[np.argsort(hyper.loc[:,'score']),:].tail(20)

Unnamed: 0,neurons,lamb1,lamb2,score,lower_ci,upper_ci
21,25,5e-05,0.0001,0.812403,0.782275,0.842531
48,50,0.0005,0.0005,0.813953,0.783921,0.843986
38,25,5e-06,1e-05,0.815504,0.785569,0.845439
9,10,0.0005,0.0001,0.815504,0.785569,0.845439
0,25,5e-05,0.0005,0.817054,0.787217,0.846892
26,50,0.0005,1e-05,0.818605,0.788866,0.848344
16,10,5e-06,5e-05,0.818605,0.788866,0.848344
18,25,0.001,5e-05,0.820155,0.790515,0.849795
31,50,5e-05,0.0001,0.820155,0.790515,0.849795
47,100,0.001,0.001,0.821705,0.792166,0.851245


# TFIDF/LSTM

### Getting data ready

Loading data:

In [22]:
X, y=np.load('data/X_tfidf.npy'),np.load('data/y_tfidf.npy')

Turning y into numeric:

In [23]:
encode={'H:Arquivado': 1,'H:Ativo': 2,'H:Suspenso': 3}
decode={1:'H:Arquivado',2:'H:Ativo',3:'H:Suspenso'}

for i in range(len(y)):
    y[i]=encode[y[i]]

Splitting the dataset in train, test and validation set:

In [24]:
y=np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_seed)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=2/3, random_state=random_seed)

y_train2=np.array(pd.get_dummies(y_train))
y_val2=np.array(pd.get_dummies(y_val))
y_test2=np.array(pd.get_dummies(y_test))

np.shape(X_train),np.shape(y_train)

((4514, 5, 4000), (4514,))

### Tuning classification model

Grid

In [25]:
hyper=expand_grid(hyper_lstm.copy(), random_seed=random_seed)
hyper=hyper[['neurons','lamb1','lamb2','score','lower_ci','upper_ci']]

np.shape(hyper)

(50, 6)

Search

In [26]:
hyper=tune_lstm(hyper, X_train, y_train2, X_val, y_val2)
hyper.to_csv('hyper/hyper_lstm_tfidf')

100%|██████████| 50/50 [37:56<00:00, 45.52s/it]


In [27]:
hyper.iloc[np.argsort(hyper.loc[:,'score']),:].tail(20)

Unnamed: 0,neurons,lamb1,lamb2,score,lower_ci,upper_ci
21,25,5e-05,0.0001,0.902326,0.879414,0.925237
20,10,0.0,5e-06,0.902326,0.879414,0.925237
16,10,5e-06,5e-05,0.902326,0.879414,0.925237
45,100,0.0005,0.001,0.903876,0.881128,0.926624
17,50,5e-05,5e-05,0.903876,0.881128,0.926624
9,10,0.0005,0.0001,0.903876,0.881128,0.926624
1,10,1e-05,1e-05,0.905426,0.882843,0.92801
42,100,1e-05,5e-06,0.905426,0.882843,0.92801
23,100,1e-05,1e-06,0.906977,0.88456,0.929393
26,50,0.0005,1e-05,0.906977,0.88456,0.929393
