In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import StandardScaler
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
import pandas as pd


In [10]:
from tensorflow.keras.metrics import Precision, Recall, AUC

## Load the dataset

Fit the model using **One-hot missing values alone**

In [11]:
df = pd.read_csv('GDSI_OpenDataset_Final_cleaned_OH.csv')
# set secret_name as index
df.set_index('secret_name', inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1141 entries, C_1005 to P_992
Columns: 116 entries, bmi_in_cat2_not_overweight to has_comorbidities.2_yes
dtypes: int64(116)
memory usage: 1.0+ MB


In [12]:
X = df.drop(columns=['ms_type2_relapsing_remitting', 'ms_type2_progressive_MS','ms_type2_other'])
Y = df[['ms_type2_relapsing_remitting', 'ms_type2_progressive_MS','ms_type2_other']]

# train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=123)

# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Ensure target variables are numpy arrays of type float32
y_train = y_train.values.astype('float32')
y_test = y_test.values.astype('float32')


In [13]:
def create_model(units=[64, 128], dropout_rate=0.2, learning_rate=0.0001, l2_reg=0.01):
    model = Sequential()
    
    # First layer
    model.add(Dense(units[0], input_dim=X_train.shape[1], activation='relu', 
                    kernel_regularizer=l2(l2_reg)))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    
    # Second layer
    model.add(Dense(units[1], activation='relu', kernel_regularizer=l2(l2_reg)))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    
    # Output layer
    model.add(Dense(3, activation='softmax'))
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    
    return model
model = KerasClassifier(build_fn=create_model, verbose=0)

## Hyperparameter Grid

In [14]:
from itertools import combinations_with_replacement

def generate_neuron_combinations(min_neurons=128, max_neurons=256):
    possible_units = [16, 32, 64, 128, 256]
    all_combinations = []
    
    for num_layers in range(2, 4):  # Testing for 1 to 3 layers
        for combination in combinations_with_replacement(possible_units, num_layers):
            total_units = sum(combination)
            if min_neurons <= total_units <= max_neurons:
                all_combinations.append(combination)
    
    return all_combinations

# Generate the combinations
neuron_combinations = generate_neuron_combinations()

# Print the generated combinations
for combo in neuron_combinations:
    print(combo)



(16, 128)
(32, 128)
(64, 64)
(64, 128)
(128, 128)
(16, 16, 128)
(16, 32, 128)
(16, 64, 64)
(16, 64, 128)
(32, 32, 64)
(32, 32, 128)
(32, 64, 64)
(32, 64, 128)
(64, 64, 64)
(64, 64, 128)


Early Layers:

- The early layers of a neural network are responsible for learning basic features or patterns from the input data. In the context of tabular data, these features could be simple relationships between input variables or basic statistical properties.
- With fewer neurons, the network is encouraged to focus on the most salient, or important, features rather than trying to capture every possible nuance. This helps prevent the network from learning irrelevant details or noise in the data.
Later Layers:

As the network goes deeper, the layers start to learn more complex, higher-level features by combining the simpler features learned in earlier layers.
Increasing the number of neurons in these layers allows the network to capture more complex patterns and interactions between the features. This is crucial as the later layers are where the network starts to form more abstract representations that are necessary for making accurate predictions.Smaller Neurons in Early Layers, Larger Neurons in Later Layers:
- Feature Extraction: Starting with a smaller number of neurons in the early layers means that the network will begin by focusing on a more compact representation of the input features. This can be beneficial if you believe that your initial input features contain noise or redundant information that you want to filter out before expanding the representation in later layers.
- Dimensionality Expansion: As you increase the number of neurons in the subsequent layers, the network has the capacity to learn more complex relationships and interactions between the features that were identified in the earlier layers. This approach is useful when you think that the complexity of patterns increases as you go deeper into the network.

In [15]:
# Define hyperparameter grid
param_grid = {
    'batch_size': [10, 20, 40, 60],
    'epochs': [50, 75, 100],
    'model__units': neuron_combinations,
    'model__dropout_rate': [0.0, 0.05, 0.1],
    'model__learning_rate': [0.00001, 0.0001, 0.001],
    'model__l2_reg': [0.001, 0.01, 0.1]
}


GridSearch the param space

In [16]:
my_scoring = {
    'accuracy': 'accuracy',
    'precision_micro': 'precision_micro',
    'recall_micro': 'recall_micro',
    'f1_macro': 'f1_macro',
    'f1_micro': 'f1_micro',
    'roc_auc': 'roc_auc'
}

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=6,scoring=my_scoring, refit='f1_micro')
grid_result = grid.fit(X_train, y_train)

print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")



Explore the other best metrics

In [13]:
# Access and print best scores for other metrics
best_accuracy = max(grid.cv_results_['mean_test_accuracy'])
best_precision = max(grid.cv_results_['mean_test_precision_micro'])
best_recall = max(grid.cv_results_['mean_test_recall_micro'])
best_f1_micro = max(grid.cv_results_['mean_test_f1_micro'])
best_f1_macro = max(grid.cv_results_['mean_test_f1_macro'])
best_roc_auc = max(grid.cv_results_['mean_test_roc_auc'])

print(f"Best accuracy: {best_accuracy}")
print(f"Best precision: {best_precision}")
print(f"Best recall: {best_recall}")
print(f"Best F1 micro: {best_f1_micro}")
print(f"Best ROC AUC: {best_roc_auc}")

Best accuracy: 0.7958411949685534
Best precision: 0.5584794874064375
Best recall: 0.44230699266114515
Best F1 micro: 0.7958411949685534
Best ROC AUC: 0.6958831705308914


In [16]:
results_df = pd.DataFrame(grid.cv_results_)
# save results to csv
results_df.to_csv('GridSearchCV_results.csv', index=False)


## TODO Fit the model using the best grid search result

In [5]:
# Define a simple model
def create_simple_model():
    model = Sequential()
    model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Create the model
model = create_simple_model()

# Fit the model
model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=2)

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


80/80 - 0s - 5ms/step - accuracy: 0.6391 - loss: 0.9568
Epoch 2/10
80/80 - 0s - 600us/step - accuracy: 0.7820 - loss: 0.6173
Epoch 3/10
80/80 - 0s - 600us/step - accuracy: 0.7995 - loss: 0.5463
Epoch 4/10
80/80 - 0s - 588us/step - accuracy: 0.8083 - loss: 0.5048
Epoch 5/10
80/80 - 0s - 575us/step - accuracy: 0.8158 - loss: 0.4747
Epoch 6/10
80/80 - 0s - 598us/step - accuracy: 0.8296 - loss: 0.4508
Epoch 7/10
80/80 - 0s - 594us/step - accuracy: 0.8333 - loss: 0.4316
Epoch 8/10
80/80 - 0s - 597us/step - accuracy: 0.8459 - loss: 0.4084
Epoch 9/10
80/80 - 0s - 588us/step - accuracy: 0.8446 - loss: 0.4004
Epoch 10/10
80/80 - 0s - 576us/step - accuracy: 0.8546 - loss: 0.3814


<keras.src.callbacks.history.History at 0x1cd789952a0>

In [None]:
print(f"X_train type: {type(X_train)}, dtype: {X_train.dtype}")
print(f"y_train type: {type(y_train)}, dtype: {y_train.dtype}")