In [47]:
import pandas as pd
import numpy as np

import tensorflow as tf
from scipy.sparse import csr_matrix, vstack, hstack
from sklearn.metrics import roc_auc_score

In [48]:
train = pd.read_csv("train.csv").dropna()
test = pd.read_csv("test.csv").dropna()
y_train = np.array(train['Sentiment'])
y_test = np.array(test['Sentiment'])
X_train = train.drop(['Sentiment'], axis=1)
X_train = X_train.values
X_test = test.drop(['Sentiment'], axis=1).values
gen_features = ['word_count', 'unigram_count', 'unique_word_count', 'unique_unigram_count', 'stopword_count', 'mean_word_length', 'mean_unigram_length', 'char_count', 'punctuation_count', 'number_count']

In [66]:
import keras_tuner as kt

def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units', min_value=16, max_value=256, step=16),
                    activation='relu', input_dim=X_train.shape[1],
                    kernel_regularizer=l2(hp.Float('l2', min_value=1e-4, max_value=1e-2, sampling='LOG'))))
    model.add(Dropout(hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)))
    model.add(Dense(1, activation='tanh'))

    adam = Adam(learning_rate=hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='LOG'))
    model.compile(optimizer=adam, loss='mean_squared_error', metrics=['accuracy'])

    return model

# Create a tuner
tuner = kt.Hyperband(
    build_model,
    objective='val_accuracy',
    max_epochs=50,
    hyperband_iterations=2)

# Perform hyperparameter tuning
tuner.search(X_train, y_train, epochs=50, validation_split=0.1, callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]


Trial 180 Complete [00h 00m 12s]
val_accuracy: 0.0

Best val_accuracy So Far: 0.0
Total elapsed time: 00h 05m 53s


In [62]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers.legacy import Adam

model = Sequential()
model.add(Dense(32, activation='relu', input_dim=X_train.shape[1], kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(1, activation='tanh'))  # Output layer

adam = Adam(learning_rate=0.0001)
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
# Fit the model - assuming your data is ready and appropriately preprocessed
model.fit(X_train, y_train, epochs=20, batch_size=4, validation_split=0.1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
 175/1326 [==>...........................] - ETA: 0s - loss: 0.9919 - accuracy: 0.0000e+00

KeyboardInterrupt: 

In [68]:

# You can now access the best hyperparameters as follows:
best_units = best_hps.get('units')
best_dropout = best_hps.get('dropout')
best_l2 = best_hps.get('l2')
best_learning_rate = best_hps.get('learning_rate')

# Now you can use these best hyperparameters to build the best model
best_model = tuner.hypermodel.build(best_hps)

# Summary of the best model
best_model.summary()

# You can now train the best model with the optimal hyperparameters
history = best_model.fit(X_train, y_train)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 208)               256880    
                                                                 
 dropout_2 (Dropout)         (None, 208)               0         
                                                                 
 dense_5 (Dense)             (None, 1)                 209       
                                                                 
Total params: 257089 (1004.25 KB)
Trainable params: 257089 (1004.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [70]:
y_pred_test = model.predict(X_test)
roc_auc_test = roc_auc_score(y_test, y_pred_test)
print(f'AUC: {roc_auc_test}')

AUC: 0.5
