In [1]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
import pandas as pd

## Load the dataset

In [2]:
df = pd.read_csv('GDSI_OpenDataset_Final_cleaned_OH.csv')
# set secret_name as index
df.set_index('secret_name', inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1141 entries, C_1005 to P_992
Columns: 116 entries, bmi_in_cat2_not_overweight to has_comorbidities.2_yes
dtypes: int64(116)
memory usage: 1.0+ MB


In [3]:
X = df.drop(columns=['ms_type2_relapsing_remitting', 'ms_type2_progressive_MS','ms_type2_other'])
Y = df[['ms_type2_relapsing_remitting', 'ms_type2_progressive_MS','ms_type2_other']]

# train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=123)

# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
# Define model creation function
def create_model(num_layers=1, units=64, dropout_rate=0.0, learning_rate=0.001):
    model = Sequential()
    model.add(Dense(units, input_dim=X_train.shape[1], activation='relu'))
    
    for _ in range(num_layers - 1):
        model.add(Dense(units, activation='relu'))
    
    model.add(Dropout(dropout_rate))
    model.add(Dense(3, activation='softmax'))
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

# Wrap the model with KerasClassifier from scikeras
model = KerasClassifier(model=create_model, verbose=0)

## Hyperparameter Grid

In [5]:
# Define hyperparameter grid
param_grid = {
    'batch_size': [10, 20, 40,60],
    'epochs': [50, 100],
    'model__num_layers': [1, 2, 3],
    'model__units': [32, 64, 128],
    'model__dropout_rate': [0.0, 0.05, 0.1, 0.2, 0.4],
    'model__learning_rate': [0.00001, 0.0001, 0.001, 0.01, 0.1]
}


GridSearch the param space

In [6]:


grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train)

print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")



In [15]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]