In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import backend as K

from scipy.sparse import csr_matrix, vstack, hstack
from sklearn.metrics import roc_auc_score

In [2]:
train = pd.read_csv("train.csv").dropna()
test = pd.read_csv("test.csv").dropna()
y_train = np.array(train['Sentiment'])
y_test = np.array(test['Sentiment'])
X_train = train.drop(['Sentiment'], axis=1)
X_train = X_train.values
X_test = test.drop(['Sentiment'], axis=1).values
gen_features = ['word_count', 'unigram_count', 'unique_word_count', 'unique_unigram_count', 'stopword_count', 'mean_word_length', 'mean_unigram_length', 'char_count', 'punctuation_count', 'number_count']

In [30]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers.legacy import Adam

model = Sequential()
model.add(Dense(16, activation='relu', input_dim=X_train.shape[1], kernel_regularizer=l2(0.0001))) #best 0.0001
model.add(Dense(24, activation='relu', kernel_regularizer=l2(0.0001)))
model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.008176190368614009))) #layers 16, 24, 16 based on keras tuner
model.add(Dense(1, activation='tanh'))  # Output layer


def custom_binary_crossentropy(y_true, y_pred):
    # Transform y_true to 0 and 1.
    y_true_transformed = K.cast((y_true + 1) / 2, dtype='float64')
    y_pred_transformed = K.cast((y_pred + 1) / 2, dtype='float64')

    # Compute binary crossentropy
    bce = K.binary_crossentropy(y_true_transformed, y_pred_transformed, from_logits=False)
    
    return K.mean(bce)

adam = Adam(learning_rate=0.0002)
def custom_accuracy(y_true, y_pred):
    # Since y_pred comes from tanh, values will be between -1 and 1.
    # We convert these to 1 and -1 for comparison.
    y_pred_labels = K.cast(K.greater(y_pred, 0), K.floatx()) * 2 - 1
    # Compare predicted labels to true labels
    correct_predictions = K.equal(y_true, y_pred_labels)
    # Calculate accuracy
    return K.mean(correct_predictions, axis=-1)

model.compile(optimizer=adam, loss=custom_binary_crossentropy, metrics=[custom_accuracy])
# Fit the model - assuming your data is ready and appropriately preprocessed
model.fit(X_train, y_train, epochs=300, batch_size=8, validation_split=0.1)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<keras.src.callbacks.History at 0x2f7c63150>

In [4]:
import keras_tuner as kt

def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('layer1', min_value=8, max_value=64, step=8),
                    activation='relu', input_dim=X_train.shape[1],
                    kernel_regularizer=l2(hp.Float('l2_1', min_value=1e-5, max_value=1e-2, sampling='LOG'))))
    #model.add(Dropout(hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)))
    model.add(Dense(units=hp.Int('layer2', min_value=8, max_value=32, step=8),
                    activation='relu',
                    kernel_regularizer=l2(hp.Float('l2_2', min_value=1e-5, max_value=1e-2, sampling='LOG'))))
    model.add(Dense(units=hp.Int('layer3', min_value=8, max_value=16, step=8),
                    activation='relu',
                    kernel_regularizer=l2(hp.Float('l2_3', min_value=1e-5, max_value=1e-2, sampling='LOG'))))
    # You can also add Dropout here if needed
    model.add(Dense(1, activation='tanh'))

    adam = Adam(learning_rate=hp.Float('learning_rate', min_value=0.00005, max_value=0.0002, step =0.00005))
    model.compile(optimizer=adam, loss=custom_binary_crossentropy, metrics=[custom_accuracy])

    return model

# Create a tuner
tuner = kt.Hyperband(
    build_model,
    objective=kt.Objective("val_custom_accuracy", direction="max"),
    max_epochs=50,
    hyperband_iterations=2)

# Perform hyperparameter tuning
tuner.search(X_train, y_train, 
             epochs=50, validation_split=0.1, 
             callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]


Trial 180 Complete [00h 00m 02s]
val_custom_accuracy: 1.0

Best val_custom_accuracy So Far: 1.0
Total elapsed time: 00h 03m 56s


In [8]:

# You can now access the best hyperparameters as follows:
best_layer1 = best_hps.get('layer1')
best_layer2 = best_hps.get('layer2')
best_layer3 = best_hps.get('layer3')
best_l2_1 = best_hps.get('l2_1')
best_l2_2 = best_hps.get('l2_2')
best_l2_3 = best_hps.get('l2_3')
best_learning_rate = best_hps.get('learning_rate')
print(best_hps.values)
# Now you can use these best hyperparameters to build the best model
best_model = tuner.hypermodel.build(best_hps)

# Summary of the best model
best_model.summary()

# You can now train the best model with the optimal hyperparameters
history = best_model.fit(X_train, y_train)

{'layer1': 16, 'l2_1': 0.0004075712325776104, 'layer2': 24, 'l2_2': 1.0051525819754434e-05, 'layer3': 16, 'l2_3': 0.008176190368614009, 'learning_rate': 0.0002, 'tuner/epochs': 2, 'tuner/initial_epoch': 0, 'tuner/bracket': 3, 'tuner/round': 0}
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 16)                9488      
                                                                 
 dense_9 (Dense)             (None, 24)                408       
                                                                 
 dense_10 (Dense)            (None, 16)                400       
                                                                 
 dense_11 (Dense)            (None, 1)                 17        
                                                                 
Total params: 10313 (40.29 KB)
Trainable params: 10313 (40.29 KB)
Non-trai

In [32]:
y_pred_test = model.predict(X_test)
roc_auc_test = roc_auc_score(y_test, y_pred_test)
print(f'AUC: {roc_auc_test}')

AUC: 0.8129695073672827
