In [1]:
import tensorflow as tf
if tf.test.gpu_device_name():
    print('GPU found')
else:
    print("No GPU found")
tf.keras.backend.clear_session()

GPU found


In [2]:
!pip install keras_tuner
!pip install transformers
!pip install datasets

Collecting keras_tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/176.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m92.2/176.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras_tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.3.5 kt-legacy-1.0.5
Collecting transformers
  Downloading transformers-4.33.1-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m60.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [

In [3]:
import pandas as pd
import numpy as np
import gc
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, Bidirectional, LSTM, Dense, Attention, Dropout, MaxPooling1D, Flatten, GRU
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
from keras_tuner.tuners import BayesianOptimization
from keras_tuner.engine.hyperparameters import HyperParameters
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, roc_auc_score
from transformers import TFAutoModel, AutoTokenizer, DataCollatorWithPadding, TFAutoModelForSequenceClassification, TFXLMRobertaForSequenceClassification, TFXLMRobertaModel

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# data = pd.read_csv("telugu-hate-speech-train.csv")
# test_data = pd.read_csv("telugu-english-test-data-with-labels.csv")

data = pd.read_csv("/content/drive/MyDrive/data/hsd/telugu-hate-speech-train.csv")
test_data = pd.read_csv("/content/drive/MyDrive/data/hsd/telugu-english-test-data-with-labels.csv")

In [6]:
MURIL_MODEL = "google/muril-base-cased"
MBERT_MODEL = "bert-base-multilingual-cased"
XLM_ROBERTA_MODEL = "xlm-roberta-base"
MAXIMUM_SEQUENCE_LENGTH = 80
EMBEDDED_SIZE = 768

PRE_TRAINED_MODELS = {"MURIL_MODEL": "google/muril-base-cased", "MBERT_MODEL": "bert-base-multilingual-cased", "XLM_ROBERTA_MODEL":"xlm-roberta-base"}

In [7]:
data = data.dropna()
data["label"] = data["Label"].map(lambda x: 1 if x=="hate" else 0)
test_data["label"] = test_data["Label"].map(lambda x: 1 if x=="hate" else 0)

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000 entries, 0 to 3999
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   S.No    4000 non-null   int64 
 1   text    4000 non-null   object
 2   Label   4000 non-null   object
 3   label   4000 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 125.1+ KB


In [9]:
data["label"].value_counts()

0    2061
1    1939
Name: label, dtype: int64

In [10]:
# Count number of negative and positive reviews
neg_num = pd.value_counts(data['label'])[0]
pos_num = pd.value_counts(data['label'])[1]

print('# negative reviews before: {}'.format(neg_num))
print('# positive reviews before: {}'.format(pos_num))

# Make the data set balanced
balanced_sample_num = np.min([neg_num, pos_num])

# Picks <'balanced_sample_num'> numbers of negative and positive reviews at random
data = (data.groupby('label', as_index = False)
        .apply(lambda x: x.sample(n = 900))
        .reset_index(drop = True))

# Shuffle the rows so that 0's and 1's are mixed
data = data.sample(frac = 1).reset_index(drop = True)

print('\n# negative reviews after: {}'.format(pd.value_counts(data['label'])[0]))
print('# positive reviews after: {}'.format(pd.value_counts(data['label'])[1]))

# negative reviews before: 2061
# positive reviews before: 1939

# negative reviews after: 900
# positive reviews after: 900


In [11]:
data["text"].head()

0    ప్రవీణ్ కుమార్ సర్ garu నిజంగా dynamic police ...
1    "కెజియఫ్" మనిషి ఒంటరివాడైనా ధైర్యంగా పరిస్థితు...
2             Assembly lo thittukodanike miru vachedhi
3                        Valla dhagara bochu vunttundi
4    Samsaram cheyaganey pillalu puttaru sir. Wait ...
Name: text, dtype: object

In [12]:
test_data["text"].head()

0    ఎన్ని సార్లు అయిన వినాలని ఉంది చిట్టి తల్లి సూ...
1              ఫ్యూచర్ లో బాగ work out అవుతుంది సూపర్.
2    ఇది బెండపూడి గవ్నమెంట్ స్టూడెంట్స్ కి మాత్రమే ...
3    తెలుగులో మాట్లాడినప్పుడు చాలా అందంగా వినసొంపుగ...
4    సూపర్ సిస్టర్ ఫ్యూచర్ లో రైల్వే లో జాబ్ రావాలన...
Name: text, dtype: object

In [13]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1800 entries, 0 to 1799
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   S.No    1800 non-null   int64 
 1   text    1800 non-null   object
 2   Label   1800 non-null   object
 3   label   1800 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 56.4+ KB


In [14]:
def get_datasets(data, test_data):
    hsd_dataset = Dataset.from_pandas(data)
    hsd_test_dataset = Dataset.from_pandas(test_data)
    return hsd_dataset, hsd_test_dataset

In [15]:
def get_dataset_with_tokens(tokenizer_name, dataset, tokenizable_col_name, remove_columns):

    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    def preprocess(data):
        # return tokenizer(data[tokenizable_col_name], return_tensors='tf', truncation=True)
        return tokenizer(data[tokenizable_col_name], return_token_type_ids=False, return_attention_mask=False, return_tensors='tf', truncation=True, padding=True)

    dataset =dataset.map(
        preprocess,
        batched=True,
        batch_size=16,
        remove_columns=remove_columns
    )
    return dataset, tokenizer


In [16]:
def get_model(model_name: str):
    return TFAutoModel.from_pretrained(model_name)

In [17]:
def get_word_emebeddings(model_name, dataset, max_seq_len=0):
    remove_columns = data.columns.to_list()
    dataset_with_tokens, tokenizer = get_dataset_with_tokens(model_name, dataset, "text", remove_columns)
    if max_seq_len == 0:
      data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
    else:
      data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf", padding="max_length", max_length=max_seq_len)
    batch = data_collator(dataset_with_tokens.to_list())
    # with tf.device('/GPU:0'):
    model = get_model(model_name)
    # last_hidden_state, pooler_output
    input_ids = batch["input_ids"]
    output = model(input_ids)
    return output


In [18]:
def get_bilstm_model(hp):
    input_layer = Input(shape=(MAXIMUM_SEQUENCE_LENGTH, EMBEDDED_SIZE))

    # Dilated Temporal Convolutional Network with dilation rates of 1, 2, and 4
    # Tune the number of filters in the Conv1D layer

    num_gru_units = hp.Int('gru_units', min_value=32, max_value=256, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    # BiLSTM
    gru_layer = Bidirectional(LSTM(units=num_gru_units, dropout=dropout_rate, return_sequences=True))(input_layer)

    # Contextualized Attention Mechanism
    attention_layer = Attention()([gru_layer, gru_layer])


    # Tune the dropout rate for regularization
    dropout_layer = Dropout(rate=dropout_rate)(attention_layer)


    # Output layer for binary classification (hate vs non-hate)
    output_layer = Dense(units=1, activation='sigmoid')(dropout_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy']
                  )
    # print(model.summary())
    return model

In [19]:
def get_lstm_model(hp):
    input_layer = Input(shape=(MAXIMUM_SEQUENCE_LENGTH, EMBEDDED_SIZE))

    # Dilated Temporal Convolutional Network with dilation rates of 1, 2, and 4
    # Tune the number of filters in the Conv1D layer

    num_gru_units = hp.Int('gru_units', min_value=32, max_value=256, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    # LSTM
    gru_layer = LSTM(units=num_gru_units, dropout=dropout_rate, return_sequences=True)(input_layer)

    # Contextualized Attention Mechanism
    attention_layer = Attention()([gru_layer, gru_layer])


    # Tune the dropout rate for regularization
    dropout_layer = Dropout(rate=dropout_rate)(attention_layer)


    # Output layer for binary classification (hate vs non-hate)
    output_layer = Dense(units=1, activation='sigmoid')(dropout_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy']
                  )
    # print(model.summary())
    return model

In [20]:
def get_gru_model(hp):
    input_layer = Input(shape=(MAXIMUM_SEQUENCE_LENGTH, EMBEDDED_SIZE))

    # Dilated Temporal Convolutional Network with dilation rates of 1, 2, and 4
    # Tune the number of filters in the Conv1D layer

    num_gru_units = hp.Int('gru_units', min_value=32, max_value=256, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    # GRU
    gru_layer = GRU(units=num_gru_units, dropout=dropout_rate, return_sequences=True)(input_layer)

    # Contextualized Attention Mechanism
    attention_layer = Attention()([gru_layer, gru_layer])


    # Tune the dropout rate for regularization
    dropout_layer = Dropout(rate=dropout_rate)(attention_layer)


    # Output layer for binary classification (hate vs non-hate)
    output_layer = Dense(units=1, activation='sigmoid')(dropout_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy']
                  )
    # print(model.summary())
    return model

In [21]:
def get_bigru_model(hp):
    input_layer = Input(shape=(MAXIMUM_SEQUENCE_LENGTH, EMBEDDED_SIZE))

    # Dilated Temporal Convolutional Network with dilation rates of 1, 2, and 4
    # Tune the number of filters in the Conv1D layer

    num_gru_units = hp.Int('gru_units', min_value=32, max_value=256, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    # BiGRU
    gru_layer = Bidirectional(GRU(units=num_gru_units, dropout=dropout_rate, return_sequences=True))(input_layer)

    # Contextualized Attention Mechanism
    attention_layer = Attention()([gru_layer, gru_layer])


    # Tune the dropout rate for regularization
    dropout_layer = Dropout(rate=dropout_rate)(attention_layer)


    # Output layer for binary classification (hate vs non-hate)
    output_layer = Dense(units=1, activation='sigmoid')(dropout_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy']
                  )
    # print(model.summary())
    return model

In [22]:
def get_cnn_bigru_model(hp):
    input_layer = Input(shape=(MAXIMUM_SEQUENCE_LENGTH, EMBEDDED_SIZE))

    # Dilated Temporal Convolutional Network with dilation rates of 1, 2, and 4
    # Tune the number of filters in the Conv1D layer

    num_filters = hp.Int('num_filters', min_value=32, max_value=256, step=32)
    kernel_size = hp.Int('kernel_size', min_value=3, max_value=7, step=1)
    num_gru_units = hp.Int('gru_units', min_value=32, max_value=256, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    conv1 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=1, activation='relu')(input_layer)
    conv2 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=2, activation='relu')(conv1)
    conv3 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=4, activation='relu')(conv2)

    # BiGRU
    gru_layer = Bidirectional(GRU(units=num_gru_units, dropout=dropout_rate, return_sequences=True))(conv3)

    # Contextualized Attention Mechanism
    attention_layer = Attention()([gru_layer, gru_layer])


    # Tune the dropout rate for regularization
    dropout_layer = Dropout(rate=dropout_rate)(attention_layer)


    # Output layer for binary classification (hate vs non-hate)
    output_layer = Dense(units=1, activation='sigmoid')(dropout_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy']
                  )
    # print(model.summary())
    return model

In [23]:
def get_cnn_gru_model(hp):
    input_layer = Input(shape=(MAXIMUM_SEQUENCE_LENGTH, EMBEDDED_SIZE))

    # Dilated Temporal Convolutional Network with dilation rates of 1, 2, and 4
    # Tune the number of filters in the Conv1D layer

    num_filters = hp.Int('num_filters', min_value=32, max_value=256, step=32)
    kernel_size = hp.Int('kernel_size', min_value=3, max_value=7, step=1)
    num_gru_units = hp.Int('gru_units', min_value=32, max_value=256, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    conv1 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=1, activation='relu')(input_layer)
    conv2 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=2, activation='relu')(conv1)
    conv3 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=4, activation='relu')(conv2)

    # GRU
    gru_layer = GRU(units=num_gru_units, dropout=dropout_rate, return_sequences=True)(conv3)

    # Contextualized Attention Mechanism
    attention_layer = Attention()([gru_layer, gru_layer])


    # Tune the dropout rate for regularization
    dropout_layer = Dropout(rate=dropout_rate)(attention_layer)


    # Output layer for binary classification (hate vs non-hate)
    output_layer = Dense(units=1, activation='sigmoid')(dropout_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy']
                  )
    # print(model.summary())
    return model

In [24]:

def get_cnn_model(hp):
    input_layer = Input(shape=(MAXIMUM_SEQUENCE_LENGTH, EMBEDDED_SIZE))

    # Dilated Temporal Convolutional Network with dilation rates of 1, 2, and 4
    # Tune the number of filters in the Conv1D layer

    num_filters = hp.Int('num_filters', min_value=32, max_value=256, step=32)
    kernel_size = hp.Int('kernel_size', min_value=3, max_value=7, step=1)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    conv1 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=1, activation='relu')(input_layer)
    conv2 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=2, activation='relu')(conv1)
    conv3 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=4, activation='relu')(conv2)

    # Tune the dropout rate for regularization
    dropout_layer = Dropout(rate=dropout_rate)(conv3)


    # Output layer for binary classification (hate vs non-hate)
    output_layer = Dense(units=1, activation='sigmoid')(dropout_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy']
                  )
    # print(model.summary())
    return model

In [25]:
def get_cnn_lstm_model(hp):
    input_layer = Input(shape=(MAXIMUM_SEQUENCE_LENGTH, EMBEDDED_SIZE))

    # Dilated Temporal Convolutional Network with dilation rates of 1, 2, and 4
    # Tune the number of filters in the Conv1D layer

    num_filters = hp.Int('num_filters', min_value=32, max_value=256, step=32)
    kernel_size = hp.Int('kernel_size', min_value=3, max_value=7, step=1)
    lstm_units = hp.Int('lstm_units', min_value=32, max_value=256, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    conv1 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=1, activation='relu')(input_layer)
    conv2 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=2, activation='relu')(conv1)
    conv3 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=4, activation='relu')(conv2)

    # LSTM
    lstm_layer = LSTM(units=lstm_units, return_sequences=True)(conv3)

    # Contextualized Attention Mechanism
    attention_layer = Attention()([lstm_layer, lstm_layer])


    # Tune the dropout rate for regularization
    dropout_layer = Dropout(rate=dropout_rate)(attention_layer)


    # Output layer for binary classification (hate vs non-hate)
    output_layer = Dense(units=1, activation='sigmoid')(dropout_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy']
                  )
    # print(model.summary())
    return model

In [26]:
def get_cnn_bilstm_model(hp):
    input_layer = Input(shape=(MAXIMUM_SEQUENCE_LENGTH, EMBEDDED_SIZE))

    # Dilated Temporal Convolutional Network with dilation rates of 1, 2, and 4
    # Tune the number of filters in the Conv1D layer

    num_filters = hp.Int('num_filters', min_value=32, max_value=256, step=32)
    kernel_size = hp.Int('kernel_size', min_value=3, max_value=7, step=1)
    lstm_units = hp.Int('lstm_units', min_value=32, max_value=256, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    conv1 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=1, activation='relu')(input_layer)
    conv2 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=2, activation='relu')(conv1)
    conv3 = Conv1D(filters=num_filters, kernel_size=kernel_size, dilation_rate=4, activation='relu')(conv2)

    # BiLSTM
    lstm_layer = Bidirectional(LSTM(units=lstm_units, return_sequences=True))(conv3)

    # Contextualized Attention Mechanism
    attention_layer = Attention()([lstm_layer, lstm_layer])


    # Tune the dropout rate for regularization
    dropout_layer = Dropout(rate=dropout_rate)(attention_layer)


    # Output layer for binary classification (hate vs non-hate)
    output_layer = Dense(units=1, activation='sigmoid')(dropout_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=["accuracy"]
                  )
    # print(model.summary())
    return model

In [27]:
def model_tuning(hp_function, emebeddings, labels):
    # Define the hyperparameter search space
    tuner = BayesianOptimization(
        hypermodel=hp_function,
        objective='val_accuracy',
        # objective=kt.Objective('val_custom_eval_fn__f1_score', direction=max),
        max_trials=7,  # Number of hyperparameter combinations
        overwrite=True,
        directory='bayesian_optimization',
        project_name='hate_speech_detection'
    )

    stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
    X_train, X_val, y_train, y_val = train_test_split(emebeddings, labels, test_size=0.3, random_state=42)

    # Perform hyperparameter tuning
    tuner.search(x=X_train, y=y_train, epochs=10, validation_data=(X_val, y_val),  callbacks=[stop_early])

    # Get the best model
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    # Build the model with the optimal hyperparameters and train it on the data for 50 epochs
    model = tuner.hypermodel.build(best_hps)
    history = model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_val, y_val))

    val_acc_per_epoch = history.history['val_accuracy']
    best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
    print('Best epoch: %d' % (best_epoch,))

    hypermodel = tuner.hypermodel.build(best_hps)

    # Retrain the model
    hypermodel.fit(X_train, y_train, epochs=best_epoch, batch_size=32, validation_data=(X_val, y_val))
    return hypermodel

In [28]:
def get_metrics(hypermodel, test_emebeddings, test_labels):
    predictions = hypermodel.predict(test_emebeddings)

    def pred_return(x):
        if x.mean()>=0.50:
            return 1
        else:
            return 0
    accuracy  =  (100*accuracy_score([pred_return(x) for x in predictions], test_labels))
    score_f1  =  (100*f1_score([pred_return(x) for x in predictions], test_labels))
    recall    =  (100*recall_score([pred_return(x) for x in predictions], test_labels))
    precision =  (100*precision_score([pred_return(x) for x in predictions], test_labels))
    losses, test_accuracy = hypermodel.evaluate(test_emebeddings, test_labels)
    print("Metrcis: ", test_accuracy)
    metrics = {"accuracy": accuracy, "score_f1": score_f1, "recall": recall, "precision": precision}
    return metrics, test_accuracy

In [29]:
def train_models_for_hsd():
    hsd_dataset, hsd_test_dataset = get_datasets(data=data, test_data=test_data)
    pre_trained_emed = list()
    cnn_names = list()
    accuracy = list()
    precision = list()
    score_f1 = list()
    recall = list()
    t_accuracy = list()
    for name, model_name in PRE_TRAINED_MODELS.items():
        print(model_name)

        # Embeddings
        emebeddings = get_word_emebeddings(model_name=model_name, dataset=hsd_dataset)
        max_seq_length = emebeddings["last_hidden_state"].shape[-2]
        global MAXIMUM_SEQUENCE_LENGTH
        MAXIMUM_SEQUENCE_LENGTH = max_seq_length

        test_emebeddings = get_word_emebeddings(model_name, hsd_test_dataset, max_seq_len=MAXIMUM_SEQUENCE_LENGTH)

        np_embeddings = emebeddings["last_hidden_state"].numpy()
        np_test_embeddings = test_emebeddings["last_hidden_state"].numpy()
        print(np_test_embeddings.shape)


        #  LSTM
        cnn_names.append("LSTM")
        pre_trained_emed.append(name)
        hypermodel = model_tuning(get_lstm_model, emebeddings=np_embeddings, labels=data["label"])
        metrics, test_accuracy = get_metrics(hypermodel=hypermodel, test_emebeddings=np_test_embeddings, test_labels=test_data["label"])
        accuracy.append(metrics["accuracy"])
        precision.append(metrics["precision"])
        score_f1.append(metrics["score_f1"])
        recall.append(metrics["recall"])
        t_accuracy.append(test_accuracy)
        gc.collect()

        #  BiLSTM
        cnn_names.append("BiLSTM")
        pre_trained_emed.append(name)
        hypermodel = model_tuning(get_bilstm_model, emebeddings=np_embeddings, labels=data["label"])
        metrics, test_accuracy = get_metrics(hypermodel=hypermodel, test_emebeddings=np_test_embeddings, test_labels=test_data["label"])
        accuracy.append(metrics["accuracy"])
        precision.append(metrics["precision"])
        score_f1.append(metrics["score_f1"])
        recall.append(metrics["recall"])
        t_accuracy.append(test_accuracy)
        gc.collect()

        #  BiGRU
        cnn_names.append("BiGRU")
        pre_trained_emed.append(name)
        hypermodel = model_tuning(get_bigru_model, emebeddings=np_embeddings, labels=data["label"])
        metrics, test_accuracy = get_metrics(hypermodel=hypermodel, test_emebeddings=np_test_embeddings, test_labels=test_data["label"])
        accuracy.append(metrics["accuracy"])
        precision.append(metrics["precision"])
        score_f1.append(metrics["score_f1"])
        recall.append(metrics["recall"])
        t_accuracy.append(test_accuracy)
        gc.collect()

        #  GRU
        cnn_names.append("GRU")
        pre_trained_emed.append(name)
        hypermodel = model_tuning(get_gru_model, emebeddings=np_embeddings, labels=data["label"])
        metrics, test_accuracy = get_metrics(hypermodel=hypermodel, test_emebeddings=np_test_embeddings, test_labels=test_data["label"])
        accuracy.append(metrics["accuracy"])
        precision.append(metrics["precision"])
        score_f1.append(metrics["score_f1"])
        recall.append(metrics["recall"])
        t_accuracy.append(test_accuracy)
        gc.collect()

        #  CNN
        cnn_names.append("CNN")
        pre_trained_emed.append(name)
        hypermodel = model_tuning(get_cnn_model, emebeddings=np_embeddings, labels=data["label"])
        metrics, test_accuracy = get_metrics(hypermodel=hypermodel, test_emebeddings=np_test_embeddings, test_labels=test_data["label"])
        accuracy.append(metrics["accuracy"])
        precision.append(metrics["precision"])
        score_f1.append(metrics["score_f1"])
        recall.append(metrics["recall"])
        t_accuracy.append(test_accuracy)
        gc.collect()

        # BiLSTM + CNN
        cnn_names.append("BiLSTM + CNN")
        pre_trained_emed.append(name)
        hypermodel = model_tuning(get_cnn_bilstm_model, emebeddings=np_embeddings, labels=data["label"])
        metrics, test_accuracy = get_metrics(hypermodel=hypermodel, test_emebeddings=np_test_embeddings, test_labels=test_data["label"])
        accuracy.append(metrics["accuracy"])
        precision.append(metrics["precision"])
        score_f1.append(metrics["score_f1"])
        recall.append(metrics["recall"])
        t_accuracy.append(test_accuracy)
        gc.collect()

        # LSTM + CNN
        cnn_names.append("LSTM + CNN")
        pre_trained_emed.append(name)
        hypermodel = model_tuning(get_cnn_lstm_model, emebeddings=np_embeddings, labels=data["label"])
        metrics, test_accuracy = get_metrics(hypermodel=hypermodel, test_emebeddings=np_test_embeddings, test_labels=test_data["label"])
        accuracy.append(metrics["accuracy"])
        precision.append(metrics["precision"])
        score_f1.append(metrics["score_f1"])
        recall.append(metrics["recall"])
        t_accuracy.append(test_accuracy)

        gc.collect()
        tf.keras.backend.clear_session()
        metrics_df = pd.DataFrame(np.array([pre_trained_emed , cnn_names, accuracy, precision, score_f1, recall, t_accuracy])).T
        metrics_df.columns = ["Word Embeddings", "Model Name", "Accuracy", "Precision", "F1-Score", "Recall", "Test Accuracy"]
    return [pre_trained_emed , cnn_names, accuracy, precision, score_f1, recall, t_accuracy], metrics_df

In [30]:
metrics, metrics_df = train_models_for_hsd()
# metrics_df = pd.DataFrame(np.array(metrics)).T
# metrics_df.columns = ["Word Embeddings", "Model Name", "Accuracy", "Precision", "F1-Score", "Recall", "Test Accuracy"]
print(metrics)
print(metrics_df)

Trial 5 Complete [00h 00m 12s]
val_accuracy: 0.5819999575614929

Best val_accuracy So Far: 0.6272726655006409
Total elapsed time: 00h 00m 59s
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Best epoch: 10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Metrcis:  0.49135541915893555
[['MURIL_MODEL', 'MURIL_MODEL', 'MURIL_MODEL', 'MURIL_MODEL', 'MURIL_MODEL', 'MURIL_MODEL', 'MURIL_MODEL', 'MBERT_MODEL', 'MBERT_MODEL', 'MBERT_MODEL', 'MBERT_MODEL', 'MBERT_MODEL', 'MBERT_MODEL', 'MBERT_MODEL', 'XLM_ROBERTA_MODEL', 'XLM_ROBERTA_MODEL', 'XLM_ROBERTA_MODEL', 'XLM_ROBERTA_MODEL', 'XLM_ROBERTA_MODEL', 'XLM_ROBERTA_MODEL', 'XLM_ROBERTA_MODEL'], ['LSTM', 'BiLSTM', 'BiGRU', 'GRU', 'CNN', 'BiLSTM + CNN', 'LSTM + CNN', 'LSTM', 'BiLSTM', 'BiGRU', 'GRU', 'CNN', 'BiLSTM + CNN', 'LSTM + CNN', 'LSTM', 'BiLSTM', 'BiGRU', 'GRU', 'CNN', 'BiLSTM + CNN', 'LSTM + CNN'], [50.0, 42.0, 42.

In [31]:
metrics_df = pd.DataFrame(np.array(metrics)).T
metrics_df.columns = ["Word Embeddings", "Model Name", "Accuracy", "Precision", "F1-Score", "Recall", "Test Accuracy"]

metrics_df

Unnamed: 0,Word Embeddings,Model Name,Accuracy,Precision,F1-Score,Recall,Test Accuracy
0,MURIL_MODEL,LSTM,50.0,0.0,0.0,0.0,0.4839999973773956
1,MURIL_MODEL,BiLSTM,42.0,50.8,46.69117647058824,43.197278911564624,0.4260147213935852
2,MURIL_MODEL,BiGRU,42.0,20.0,25.64102564102565,35.714285714285715,0.3956764936447143
3,MURIL_MODEL,GRU,47.0,8.0,13.114754098360654,36.36363636363637,0.3965588510036468
4,MURIL_MODEL,CNN,38.6,26.4,30.068337129840543,34.92063492063492,0.3892786502838135
5,MURIL_MODEL,BiLSTM + CNN,38.6,29.6,32.527472527472526,36.09756097560975,0.3884262442588806
6,MURIL_MODEL,LSTM + CNN,38.0,29.6,32.314410480349345,35.57692307692308,0.3819999992847442
7,MBERT_MODEL,LSTM,59.0,43.6,51.536643026004725,63.005780346820806,0.6035621762275696
8,MBERT_MODEL,BiLSTM,47.8,22.4,30.02680965147453,45.52845528455284,0.4498706459999084
9,MBERT_MODEL,BiGRU,55.2,45.6,50.4424778761062,56.43564356435643,0.5544776320457458
