# Training GRU model

In [1]:
import os

import keras
import numpy as np
import pandas as pd

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from utils.KerasModels import generateExpData
from utils.KerasModels import buildCharGRUModel
from sklearn.metrics import balanced_accuracy_score, f1_score, precision_score, recall_score

MODELS_DIR = os.path.join('..', 'models', 'charCNN_GRU')
EPOCHS = 200

In [2]:
def prepare_data(data_directory):
    train = pd.read_csv(os.path.join(data_directory, 'train.csv'))
    validation = pd.read_csv(os.path.join(data_directory, 'validation.csv'))
    test = pd.read_csv(os.path.join(data_directory, 'test.csv'))

    dfTrainDataset = train[["screen_name", "text", "account.type"]]
    dfValDataset = validation[["screen_name", "text", "account.type"]]
    dfTestDataset = test[["screen_name", "text", "account.type"]]

    tokenizer = None
    train_features, tokenizer = generateExpData(dfTrainDataset, tokenizer=tokenizer)
    val_features, tokenizer = generateExpData(dfValDataset, tokenizer=tokenizer)
    test_features, tokenizer = generateExpData(dfTestDataset, tokenizer=tokenizer)

    dictLabels = {"human": 0, "bot": 1}
    y_train = dfTrainDataset["account.type"].apply(lambda x: dictLabels[x])
    y_val = dfValDataset["account.type"].apply(lambda x: dictLabels[x])
    y_test = dfTestDataset["account.type"].apply(lambda x: dictLabels[x])

    train_labels = y_train.tolist()
    val_labels = y_val.tolist()
    test_labels = y_test.tolist()

    vocab_size = len(tokenizer.word_index)

    return train_features, val_features, test_features, train_labels, val_labels, test_labels, vocab_size


def proba_to_pred(y_proba):
    y_pred_char_cnn = (y_proba > 0.5).astype(int)
    return y_pred_char_cnn


def calculate_metrics(y_true, y_pred):
    results = {
        'balanced_accuracy': balanced_accuracy_score(y_true, y_pred),
        'f1_score': f1_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred),
        'recall': recall_score(y_true, y_pred)
    }
    return results

In [3]:
DATA_PATH = os.path.join('..', 'data', 'preprocessed_url_simple')
train_features, val_features, test_features, train_labels, val_labels, test_labels, vocab_size = prepare_data(DATA_PATH)

model = buildCharGRUModel(vocab_size, embSize=32, inputSize=320)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 320)]             0         
                                                                 
 embedding (Embedding)       (None, 320, 32)           3360      
                                                                 
 bidirectional (Bidirectiona  (None, 1024)             1677312   
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 1024)              0         
                                                                 
 dense (Dense)               (None, 1)                 1025      
                                                                 
Total params: 1,681,697
Trainable params: 1,681,697
Non-trainable params: 0
___________________________________________________

In [4]:
# Train CNN model.
os.makedirs(MODELS_DIR, exist_ok=True)
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
save_best = keras.callbacks.ModelCheckpoint(os.path.join(MODELS_DIR, "simple"), monitor='val_loss',
                                            save_best_only=True, restore_best_weights=True)
model.fit(np.array(train_features), np.array(train_labels), batch_size=256, epochs=EPOCHS,
          validation_data=(np.array(val_features), np.array(val_labels)),
          callbacks=[early_stopping, save_best])

Epoch 1/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 2/200
Epoch 3/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 4/200
Epoch 5/200
Epoch 6/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 7/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 8/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 9/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 10/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 11/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 12/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 21/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 26/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 27/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 28/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 29/200
Epoch 30/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 31/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 32/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 33/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 34/200
Epoch 35/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\simple\assets


Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200


<keras.callbacks.History at 0x16aa6626fa0>

In [5]:
probas = model.predict(np.array(test_features))
y_pred = proba_to_pred(probas)
results = calculate_metrics(test_labels, y_pred)
results



{'balanced_accuracy': 0.8322305359937403,
 'f1_score': 0.8443960826985855,
 'precision': 0.7880839539607312,
 'recall': 0.909375}

# Lemmatized data 

In [6]:
LEMMATIZED_DATA_PATH = os.path.join('..', 'data', 'lemmatized')
lem_train_features, lem_val_features, lem_test_features, lem_train_labels, lem_val_labels, lem_test_labels, lem_vocab_size = prepare_data(
    LEMMATIZED_DATA_PATH)

modelLEM = buildCharGRUModel(lem_vocab_size, embSize=32, inputSize=320)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 320)]             0         
                                                                 
 embedding_1 (Embedding)     (None, 320, 32)           3360      
                                                                 
 bidirectional_1 (Bidirectio  (None, 1024)             1677312   
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 1024)              0         
                                                                 
 dense_1 (Dense)             (None, 1)                 1025      
                                                                 
Total params: 1,681,697
Trainable params: 1,681,697
Non-trainable params: 0
_________________________________________________

In [7]:
# Train CNN model.
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
save_best = keras.callbacks.ModelCheckpoint(os.path.join(MODELS_DIR, "lemmatized"), monitor='val_loss',
                                            save_best_only=True, restore_best_weights=True)
modelLEM.fit(np.array(lem_train_features), np.array(lem_train_labels), batch_size=256, epochs=EPOCHS,
             validation_data=(np.array(lem_val_features), np.array(lem_val_labels)),
             callbacks=[early_stopping, save_best])

Epoch 1/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 2/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 3/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 4/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 5/200
Epoch 6/200
Epoch 7/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 8/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 9/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 10/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 11/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 12/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 13/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 14/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 15/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 16/200
Epoch 17/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 18/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 19/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 20/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 21/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 22/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 23/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 24/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 25/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 26/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 27/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 28/200
Epoch 29/200
Epoch 30/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 36/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\lemmatized\assets


Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200


<keras.callbacks.History at 0x16abfb43dc0>

In [8]:
probas_lem = modelLEM.predict(np.array(lem_test_features))
y_pred_lem = proba_to_pred(probas_lem)
results_lem = calculate_metrics(lem_test_labels, y_pred_lem)
results_lem



{'balanced_accuracy': 0.8408340669014085,
 'f1_score': 0.85183836912996,
 'precision': 0.7975460122699386,
 'recall': 0.9140625}

# Stemmed data

In [9]:
STEMMED_DATA_PATH = os.path.join('..', 'data', 'stemmed')
stem_train_features, stem_val_features, stem_test_features, stem_train_labels, stem_val_labels, stem_test_labels, stem_vocab_size = prepare_data(
    STEMMED_DATA_PATH)

modelSTEM = buildCharGRUModel(stem_vocab_size, embSize=32, inputSize=320)

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 320)]             0         
                                                                 
 embedding_2 (Embedding)     (None, 320, 32)           3360      
                                                                 
 bidirectional_2 (Bidirectio  (None, 1024)             1677312   
 nal)                                                            
                                                                 
 dropout_2 (Dropout)         (None, 1024)              0         
                                                                 
 dense_2 (Dense)             (None, 1)                 1025      
                                                                 
Total params: 1,681,697
Trainable params: 1,681,697
Non-trainable params: 0
_________________________________________________

In [10]:
# Train CNN model.
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
save_best = keras.callbacks.ModelCheckpoint(os.path.join(MODELS_DIR, "stemmed"), monitor='val_loss',
                                            save_best_only=True, restore_best_weights=True)
modelSTEM.fit(np.array(stem_train_features), np.array(stem_train_labels), batch_size=256, epochs=EPOCHS,
              validation_data=(np.array(stem_val_features), np.array(stem_val_labels)),
              callbacks=[early_stopping, save_best])

Epoch 1/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 2/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 8/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 9/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 10/200
Epoch 11/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 12/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 13/200
Epoch 14/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 15/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 16/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 17/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 18/200
Epoch 19/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 20/200
Epoch 21/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 22/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 23/200
Epoch 24/200
Epoch 25/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 26/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 27/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 28/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 29/200
Epoch 30/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 31/200
Epoch 32/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 33/200
Epoch 34/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 35/200
Epoch 36/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 37/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 43/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 44/200
Epoch 45/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 46/200
Epoch 47/200
Epoch 48/200



INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


INFO:tensorflow:Assets written to: ..\models\charCNN_GRU\stemmed\assets


Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200


<keras.callbacks.History at 0x16ab9b9b070>

In [11]:
probas_stem = modelSTEM.predict(np.array(stem_test_features))
y_pred_stem = proba_to_pred(probas_stem)
results_stem = calculate_metrics(stem_test_labels, y_pred_stem)
results_stem



{'balanced_accuracy': 0.8330172877543036,
 'f1_score': 0.8442174388909157,
 'precision': 0.7919233401779603,
 'recall': 0.90390625}

In [12]:
# Create csv with all results

results_all = pd.DataFrame([results, results_lem, results_stem])
results_all['model'] = 'CharCNN+GRU'
results_all['dataset'] = ['simple', 'lemmatized', 'stemmed']

if not os.path.exists('results'):
    os.makedirs('results')
results_all.to_csv('results/char_cnn_gru.csv', index=False)
results_all

Unnamed: 0,balanced_accuracy,f1_score,precision,recall,model,dataset
0,0.832231,0.844396,0.788084,0.909375,CharCNN+GRU,simple
1,0.840834,0.851838,0.797546,0.914062,CharCNN+GRU,lemmatized
2,0.833017,0.844217,0.791923,0.903906,CharCNN+GRU,stemmed
