In [None]:
from google.colab import  drive

In [None]:
drive.mount('/drive')

Mounted at /drive


In [None]:
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import  regularizers
from keras.regularizers import l2

In [None]:
def file_reader(file_location):
    if(file_location.endswith('csv')):
        return pd.read_csv( file_location , engine = 'python')
    elif (file_location.endswith('tsv')):
        return pd.read_csv( file_location , engine = 'python' ,sep = '\t')

In [None]:
import tensorflow
import gc

# Reset Keras Session
def reset_keras():
    sess = tf.compat.v1.Session()
    tf.keras.backend.clear_session()
    sess.close()
    sess = tf.compat.v1.Session()

    try:
        del model1 # this is from global space - change this as you need
    except:
        pass
    try:
        del model0 # this is from global space - change this as you need
    except:
        pass

    print(gc.collect()) # if it does something you should see a number as output


In [None]:
def generate_model(input_len, embeddings):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding( 
                                   input_dim = embeddings.shape[0], 
                                   output_dim = embedding_dim, 
                                   weights = [embeddings], 
                                   input_length = input_len , 
                                   trainable = False
                                  ),
        tf.keras.layers.Conv1D( 
                                filters=128,
                                kernel_size=4,
                                activation='relu',
                              ),
        tf.keras.layers.Conv1D( 
                                filters=8,
                                kernel_size=4,
                                activation='relu',
                              ),
        tf.keras.layers.Conv1D( 
                                filters=256,
                                kernel_size=4,
                                activation='relu',
                                #kernel_regularizer = tf.keras.regularizers.l1_l2(l1=1e-5, l2=1e-4),
                                #bias_regularizer = tf.keras.regularizers.l2(1e-4),
                                #activity_regularizer = tf.keras.regularizers.l2(1e-5)
                              ),
        tf.keras.layers.GlobalAvgPool1D(),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(4,activation='sigmoid'),
        tf.keras.layers.Dense(1,activation='sigmoid')
    ])
    model.compile( loss='binary_crossentropy' , optimizer = 'adam', metrics=['acc'] )
    return model

In [None]:
def train_model(X, Y, embeddings, num_epoch):
    model1 = generate_model(X.shape[1], embeddings)
    model0 = generate_model(X.shape[1], embeddings)
    model1.fit(X, (Y == 1).astype(int), epochs = num_epoch , shuffle = True , batch_size = 200, verbose=0)
    model0.fit(X, (Y == 0).astype(int), epochs = num_epoch , shuffle = True , batch_size = 200, verbose=0)
    return (model1,model0)

In [None]:
def predict_model(model1, model0, X):
    pred1 = model1.predict(X)
    pred0 = model0.predict(X)
    return (pred1.T[0] > pred0.T[0]).astype(int)

In [None]:
def train_and_test(X_train, X_test, Y_train, Y_test, num_epoch):
    model1,model0 = train_model(X_train, Y_train, embeddings, num_epoch)
    ptrain = predict_model(model1, model0, X_train)
    ptest  = predict_model(model1, model0, X_test)
    train_score = np.mean( (ptrain == Y_train).astype(int) )         
    test_score  = np.mean( (ptest == Y_test).astype(int) ) 
    return train_score, test_score

In [None]:
def gen_LOOCV_sets(X, Y, i):
    X_train = np.delete(X, i, axis = 0)
    X_test  = np.array( [X[i]] )
    Y_train = df_essays.drop(df_essays.index[i])
    Y_test  = df_essays.iloc[i]
    return (X_train, X_test, Y_train, Y_test)

In [None]:
def cross_val_CNN(X, Y, embeddings, num_epoch, left, right):
    train_scores, test_scores = pd.DataFrame(), pd.DataFrame()
    
    for i in range(left,right):
        
        print('Iteration',i, end='  :')
        
        (X_train, X_test, Y_train, Y_test) = gen_LOOCV_sets(X, Y, i)
        dtrain,dtest = {}, {}
        dtrain['i'] = i
        dtest['i']  = i
        for trait in trait_names:
            
            print(trait,end=' - ')
            
            train_score, test_score = train_and_test(X_train, X_test, Y_train[trait], Y_test[trait], num_epoch)
            dtrain[trait] = train_score
            dtest[trait]  = test_score
        
        train_scores = train_scores.append(dtrain,ignore_index = True)
        test_scores  = test_scores.append(dtest,ignore_index = True)   
        print(' Done\n')
        
        train_scores.to_csv('/drive/My Drive/Files/CNN_train_score_' + str(left) + '.csv')
        test_scores.to_csv('/drive/My Drive/Files/CNN_test_score_' + str(left) + '.csv')
        
        reset_keras()
    return train_scores,test_scores

In [None]:
1

1

In [None]:
trait_names = ['cEXT' , 'cNEU' , 'cCON','cAGR' , 'cOPN']
embedding_dim         = 300
num_epochs            = 20
batch_size            = 100
max_length            = 300

In [None]:
with open( '/drive/My Drive/Files/essays.pickle', 'rb' ) as handle:
    essays = pickle.load(handle)
     
with open( '/drive/My Drive/Files/embeddings.pickle', 'rb') as handle:
    embeddings = pickle.load(handle)
    
df_essays = file_reader( '/drive/My Drive/Files/essays.csv' )

In [None]:
train_score,test_score = cross_val_CNN(essays, df_essays, embeddings, 80, left = 1394, right = 1400)

Iteration 1394  :cEXT - cNEU - cCON - cAGR - cOPN -  Done

63791
Iteration 1395  :cEXT - cNEU - cCON - cAGR - cOPN -  Done

44109
Iteration 1396  :cEXT - cNEU - cCON - cAGR - cOPN -  Done

44109
Iteration 1397  :cEXT - cNEU - cCON - cAGR - cOPN -  Done

44109
Iteration 1398  :cEXT - cNEU - cCON - cAGR - cOPN -  Done

44109
Iteration 1399  :cEXT - cNEU - cCON - cAGR - cOPN -  Done

44109
