In [None]:
import sklearn
from keras.layers import LSTM, GRU, Dense, Dropout, Bidirectional
from keras.models import Sequential
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.metrics import cohen_kappa_score
import numpy as np
import pickle

In [None]:
with open('org_gen_aug_nn.pkl', 'rb') as file:
  glv_vec = pickle.load(file)
  y = pickle.load(file)
  new_vec_nn = pickle.load(file)
  new_y_nn = pickle.load(file)
  aug_vec_nn = pickle.load(file)
  aug_y_nn = pickle.load(file)
  aug_v = pickle.load(file)
  aug_y = pickle.load(file)
file.close()

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

In [None]:
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(glv_vec, y, test_size=0.25, random_state=41, stratify=y)


In [None]:
def rnn(RNN, input_shape):
  if RNN == 'LSTM': RNN = LSTM
  elif RNN == 'GRU': RNN = GRU
  
  """Define the model."""
  model = Sequential()
  model.add(Bidirectional(RNN(input_shape[-1], dropout=0.4, recurrent_dropout=0.4, input_shape=[1, input_shape[-1]], return_sequences=True)))
  model.add(Bidirectional(RNN(64, recurrent_dropout=0.4)))
  model.add(Dropout(0.5))
  model.add(Dense(1, activation='relu'))

  model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['mae'])
  model.build(input_shape)
  model.summary()

  return model

In [None]:
rnn_lstm = rnn('LSTM', X_train_nn.shape)
rnn_lstm.fit(X_train_nn, y_train_nn, batch_size=64, epochs=100)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (8037, 1, 100)           40400     
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (8037, 128)              84480     
 nal)                                                            
                                                                 
 dropout (Dropout)           (8037, 128)               0         
                                                                 
 dense (Dense)               (8037, 1)                 129       
                                                                 
Total params: 125,009
Trainable params: 125,009
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Ep

<keras.callbacks.History at 0x7fadb1d84c40>

In [None]:
rnn_gru = rnn('GRU', X_train_nn.shape)
rnn_gru.fit(X_train_nn, y_train_nn, batch_size=64, epochs=100)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_2 (Bidirectio  (8037, 1, 100)           30600     
 nal)                                                            
                                                                 
 bidirectional_3 (Bidirectio  (8037, 128)              63744     
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (8037, 128)               0         
                                                                 
 dense_1 (Dense)             (8037, 1)                 129       
                                                                 
Total params: 94,473
Trainable params: 94,473
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Ep

<keras.callbacks.History at 0x7fadb1ac33a0>

In [None]:
def test_rnn_model(model, x_test, y_test):
  y_pred = model.predict(x_test)
  y_pred = np.around(y_pred)

  result_aug = cohen_kappa_score(y_test,y_pred,weights='quadratic')
  print("Kappa Score: {}".format(result_aug))

In [None]:
test_rnn_model(rnn_lstm, X_test_nn, y_test_nn)

Kappa Score: 0.7219012181625678


In [None]:
test_rnn_model(rnn_gru, X_test_nn, y_test_nn)

Kappa Score: 0.7193810300047577


In [None]:
_, new_x_test_nn, _, new_y_test_nn = train_test_split(new_vec_nn, new_y_nn, test_size=0.25)


In [None]:
test_rnn_model(rnn_lstm, new_x_test_nn, new_y_test_nn)


Kappa Score: 0.23756051592297123


In [None]:
test_rnn_model(rnn_gru, new_x_test_nn, new_y_test_nn)


Kappa Score: 0.25650329008056094


In [None]:
aug_x_tr, aug_x_ts, aug_y_tr, aug_y_ts = train_test_split(aug_vec_nn, aug_y_nn, test_size=0.2, random_state=41)


In [None]:
rnn_1 = rnn('LSTM', aug_x_tr.shape)
rnn_1.fit(aug_x_tr, aug_y_tr, batch_size=64, epochs=100)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_8 (Bidirectio  (7718, 1, 100)           40400     
 nal)                                                            
                                                                 
 bidirectional_9 (Bidirectio  (7718, 128)              84480     
 nal)                                                            
                                                                 
 dropout_4 (Dropout)         (7718, 128)               0         
                                                                 
 dense_4 (Dense)             (7718, 1)                 129       
                                                                 
Total params: 125,009
Trainable params: 125,009
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100


<keras.callbacks.History at 0x7fada9610bb0>

In [None]:
rnn_2 = rnn('GRU', aug_x_tr.shape)
rnn_2.fit(aug_x_tr, aug_y_tr, batch_size=64, epochs=100)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_6 (Bidirectio  (7718, 1, 100)           30600     
 nal)                                                            
                                                                 
 bidirectional_7 (Bidirectio  (7718, 128)              63744     
 nal)                                                            
                                                                 
 dropout_3 (Dropout)         (7718, 128)               0         
                                                                 
 dense_3 (Dense)             (7718, 1)                 129       
                                                                 
Total params: 94,473
Trainable params: 94,473
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Ep

<keras.callbacks.History at 0x7fadaaba5a60>

In [None]:
test_rnn_model(rnn_1, aug_x_ts, aug_y_ts)

Kappa Score: 0.6504415233623789


In [None]:
test_rnn_model(rnn_2, aug_x_ts, aug_y_ts)

Kappa Score: 0.590661618960719


In [None]:
aug_x_tr, aug_x_ts, aug_y_tr, aug_y_ts = train_test_split(aug_v, aug_y, test_size=0.2, random_state=41)


In [None]:
rnn_1 = rnn('LSTM', aug_x_tr.shape)
rnn_1.fit(aug_x_tr, aug_y_tr, batch_size=64, epochs=100)

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_10 (Bidirecti  (14360, 1, 100)          40400     
 onal)                                                           
                                                                 
 bidirectional_11 (Bidirecti  (14360, 128)             84480     
 onal)                                                           
                                                                 
 dropout_5 (Dropout)         (14360, 128)              0         
                                                                 
 dense_5 (Dense)             (14360, 1)                129       
                                                                 
Total params: 125,009
Trainable params: 125,009
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100


<keras.callbacks.History at 0x7fada7797910>

In [None]:
rnn_2 = rnn('GRU', aug_x_tr.shape)
rnn_2.fit(aug_x_tr, aug_y_tr, batch_size=64, epochs=100)

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_12 (Bidirecti  (14360, 1, 100)          30600     
 onal)                                                           
                                                                 
 bidirectional_13 (Bidirecti  (14360, 128)             63744     
 onal)                                                           
                                                                 
 dropout_6 (Dropout)         (14360, 128)              0         
                                                                 
 dense_6 (Dense)             (14360, 1)                129       
                                                                 
Total params: 94,473
Trainable params: 94,473
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Ep

<keras.callbacks.History at 0x7fada46113a0>

In [None]:
test_rnn_model(rnn_1, aug_x_ts, aug_y_ts)


Kappa Score: 0.6709760885482878


In [None]:
test_rnn_model(rnn_2, aug_x_ts, aug_y_ts)

Kappa Score: 0.6420459667161256


In [None]:
test_rnn_model(rnn_1, X_test_nn, y_test_nn)

Kappa Score: 0.704142531577241


In [None]:
test_rnn_model(rnn_2, X_test_nn, y_test_nn)

Kappa Score: 0.6829526200645666
