# Tuning Parameters for Neural Network Models
MSc in Statistical Science\
University of Oxford\
Group-assessed practical\
HT 2024

## Based on file `NN,LDA+NN,PCA+NN.ipynb`

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from search_param.grid_search import read_data, grid_search, rand_search
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from keras_tuner import RandomSearch

## Load dataset

In [2]:
X_train_sc, X_val_sc, X_train_pca, X_val_pca, X_train_lda, X_val_lda, y_train, y_val = read_data()
y_train = y_train[0].values
y_val = y_val[0].values

## Neural Network

In [3]:
# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Fit label encoder and return encoded labels
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

# Convert labels to one-hot encoding
y_train_onehot = to_categorical(y_train_encoded)
y_val_onehot = to_categorical(y_val_encoded)

### Tuning parameters

In [4]:
def create_model(n_layers=3, activation='relu', leanring_rate=0.01, dropout=0.5):
    inputs = Input(shape=(518,))
    x = inputs
    for _ in range(n_layers):
        x = Dense(256, activation=activation)(x)
        x = Dropout(dropout)(x)
    outputs = Dense(8, activation='softmax')(x)  # Assuming 8 classes
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(learning_rate=leanring_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [5]:
n_splits = 5  # For example, 5-fold cross-validation
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

In [6]:
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units', min_value=32, max_value=518, step=32),
                    activation=hp.Choice('activation', values=['relu', 'softplus', 'tanh', 'sigmoid']),
                    input_shape=(518,)))  # Ensure this matches your feature size
    # Use hp.Choice to select the dropout rate
    model.add(Dropout(rate=hp.Float('dropout_0', min_value=0.0, max_value=0.5, step=0.05)))

    for i in range(hp.Int('layers', 1, 5)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=518, step=32),
                        activation=hp.Choice(f'activation_{i}', values=['relu', 'softplus', 'tanh', 'sigmoid'])))
        # Use hp.Choice for each layer's dropout rate
        model.add(Dropout(rate=hp.Float(f'dropout_{i+1}',  min_value=0.0, max_value=0.5, step=0.05)))

    model.add(Dense(8, activation='softmax'))  # Adjust the number of units based on your number of classes
    model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])
    return model


tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,  # Set smaller during experimentation
    executions_per_trial=1,  # Increase for more robust results
    directory='my_dir',
    project_name='hparam_tuning'
)

Reloading Tuner from my_dir\hparam_tuning\tuner0.json


In [8]:
# Use a lower number of epochs for the search phase
tuner.search(X_train_sc, y_train_onehot, epochs=20, validation_data=(X_val_sc, y_val_onehot))

Trial 5 Complete [00h 00m 14s]
val_accuracy: 0.5774999856948853

Best val_accuracy So Far: 0.590833306312561
Total elapsed time: 00h 06m 24s


In [10]:
# Get the best model
sc_best = tuner.get_best_models(num_models=1)[0]
sc_best.summary()

In [13]:
# Save the best model to a file
sc_best.save('search_nn/sc_best.keras')

In [14]:
for layer in sc_best.layers:
    config = layer.get_config()  # Get the layer's configuration dict
    # The 'activation' key in the config dict contains the activation function name
    if 'activation' in config:
        print(f"Layer: {layer.name}, Activation Function: {config['activation']}")
    else:
        print(f"Layer: {layer.name}, No activation function")


Layer: dense, Activation Function: relu
Layer: dropout, No activation function
Layer: dense_1, Activation Function: sigmoid
Layer: dropout_1, No activation function
Layer: dense_2, Activation Function: relu
Layer: dropout_2, No activation function
Layer: dense_3, Activation Function: softplus
Layer: dropout_3, No activation function
Layer: dense_4, Activation Function: softmax


In [15]:
for layer in sc_best.layers:
    config = layer.get_config()  # Extract the layer configuration as a dictionary
    layer_type = config['name'].split('_')[0]  # Get the type of layer (e.g., "dense", "dropout")
    
    # Print layer type and configuration
    print(f"Layer Type: {layer_type.upper()}")
    if layer_type == 'dense':
        print(f"  - Units: {config['units']}")
        print(f"  - Activation: {config['activation']}")
    elif layer_type == 'dropout':
        print(f"  - Rate: {config['rate']}")

Layer Type: DENSE
  - Units: 192
  - Activation: relu
Layer Type: DROPOUT
  - Rate: 0.25
Layer Type: DENSE
  - Units: 32
  - Activation: sigmoid
Layer Type: DROPOUT
  - Rate: 0.35000000000000003
Layer Type: DENSE
  - Units: 480
  - Activation: relu
Layer Type: DROPOUT
  - Rate: 0.45
Layer Type: DENSE
  - Units: 512
  - Activation: softplus
Layer Type: DROPOUT
  - Rate: 0.05
Layer Type: DENSE
  - Units: 8
  - Activation: softmax


In [16]:
test_loss, test_acc = best_model.evaluate(X_val_sc, y_val_onehot)
print(f"Test Accuracy: {test_acc:.4f}")

[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6166 - loss: 1.2509  
Test Accuracy: 0.5908


***
Here

## LDA with NN

In [17]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import cross_val_score
# Split the dataset into training and validation sets (using y_train_encoded since LDA does not use one-hot encoding)
X_training_l, X_val_l, y_training_l, y_val_l = train_test_split(X_scaled, y_train_encoded, test_size=0.2, random_state=42)

# Apply LDA for dimensionality reduction
lda = LDA(n_components=None)  # n_components=None for using the maximum number of components less than the number of classes
X_training_lda = lda.fit_transform(X_training_l, y_training_l)
X_val_lda = lda.transform(X_val_l)

# Assuming y_train_encoded is your original label-encoded target array
y_training_l_onehot = to_categorical(y_training_l, num_classes=8)
y_val_l_onehot = to_categorical(y_val_l, num_classes=8)



In [18]:
def build_model(hp):
    model = Sequential()
    # Dynamically set the input shape based on LDA's output
    input_shape = (X_training_lda.shape[1],)  # Use the feature size from LDA transformation

    model.add(Dense(units=hp.Int('units', min_value=32, max_value=518, step=32),
                    activation=hp.Choice('activation', values=['relu', 'softplus', 'tanh', 'sigmoid']),
                    input_shape=input_shape))  # Adjusted to the dynamic input shape

    model.add(Dropout(rate=hp.Float('dropout_0', min_value=0.0, max_value=0.5, step=0.05)))

    for i in range(hp.Int('layers', 1, 5)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=518, step=32),
                        activation=hp.Choice(f'activation_{i}', values=['relu', 'softplus', 'tanh', 'sigmoid'])))
        model.add(Dropout(rate=hp.Float(f'dropout_{i+1}',  min_value=0.0, max_value=0.5, step=0.05)))

    model.add(Dense(8, activation='softmax'))  # Assuming 8 classes, adjust as necessary
    model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])
    return model


import shutil

# Be careful with this operation to avoid deleting important data
shutil.rmtree('my_dir/hparam_tuning')

# Now, create a new tuner instance as before
tuner1 = RandomSearch(
    build_model,
    objective='val_accuracy',
    executions_per_trial=1,
    directory='my_dir',
    project_name='hparam_tuning'  # The same project name can be reused after deletion
)



tuner1.search(X_training_lda, y_training_l_onehot, epochs=20, validation_data=(X_val_lda, y_val_l_onehot))  # Use a lower number of epochs for the search phase

Trial 10 Complete [00h 00m 10s]
val_accuracy: 0.54666668176651

Best val_accuracy So Far: 0.5616666674613953
Total elapsed time: 00h 02m 03s


In [19]:
# Get the best model
best_model_lda= tuner1.get_best_models(num_models=1)[0]
best_model_lda.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 96)                768       
                                                                 
 dropout (Dropout)           (None, 96)                0         
                                                                 
 dense_1 (Dense)             (None, 224)               21728     
                                                                 
 dropout_1 (Dropout)         (None, 224)               0         
                                                                 
 dense_2 (Dense)             (None, 352)               79200     
                                                                 
 dropout_2 (Dropout)         (None, 352)               0         
                                                                 
 dense_3 (Dense)             (None, 32)                1

In [20]:
test_loss, test_acc = best_model_lda.evaluate(X_val_lda,y_val_l_onehot)
print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.5617


In [54]:
# Fit LDA as a classifier
#lda_classifier = LDA()

#scores = cross_val_score(lda, X_training_l, y_training_l, cv=5)
#lda_classifier.fit(X_training_l, y_training_l)
#lda.fit(X_training_l, y_training_l)

# Make predictions on the validation set
#y_pred = lda_classifier.predict(X_val_l)

# Evaluate the classifier
#from sklearn.metrics import accuracy_score#
#print("LDA Classification Accuracy:", accuracy_score(y_val_l, y_pred))

LDA Classification Accuracy: 0.555


In [21]:
for layer in best_model_lda.layers:
    config = layer.get_config()  # Extract the layer configuration as a dictionary
    layer_type = config['name'].split('_')[0]  # Get the type of layer (e.g., "dense", "dropout")
    
    # Print layer type and configuration
    print(f"Layer Type: {layer_type.upper()}")
    if layer_type == 'dense':
        print(f"  - Units: {config['units']}")
        print(f"  - Activation: {config['activation']}")
    elif layer_type == 'dropout':
        print(f"  - Rate: {config['rate']}")

Layer Type: DENSE
  - Units: 96
  - Activation: softplus
Layer Type: DROPOUT
  - Rate: 0.1
Layer Type: DENSE
  - Units: 224
  - Activation: tanh
Layer Type: DROPOUT
  - Rate: 0.45
Layer Type: DENSE
  - Units: 352
  - Activation: softplus
Layer Type: DROPOUT
  - Rate: 0.35000000000000003
Layer Type: DENSE
  - Units: 32
  - Activation: relu
Layer Type: DROPOUT
  - Rate: 0.0
Layer Type: DENSE
  - Units: 32
  - Activation: relu
Layer Type: DROPOUT
  - Rate: 0.0
Layer Type: DENSE
  - Units: 32
  - Activation: relu
Layer Type: DROPOUT
  - Rate: 0.0
Layer Type: DENSE
  - Units: 8
  - Activation: softmax


## PCA with NN

In [22]:
from sklearn.decomposition import PCA
# Apply PCA for dimensionality reduction
pca = PCA(n_components=100)  # Select top 100 components
X_training_pca = pca.fit_transform(X_training)
X_val_pca = pca.transform(X_val)


def build_model_pca(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units', min_value=32, max_value=518, step=32),
                    activation=hp.Choice('activation', values=['relu', 'softplus', 'tanh', 'sigmoid']),
                    input_shape=(100,)))  # Adjusted to match PCA output
    model.add(Dropout(rate=hp.Float('dropout_0', min_value=0.0, max_value=0.5, step=0.05)))

    for i in range(hp.Int('layers', 1, 5)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=518, step=32),
                        activation=hp.Choice(f'activation_{i}', values=['relu', 'softplus', 'tanh', 'sigmoid'])))
        model.add(Dropout(rate=hp.Float(f'dropout_{i+1}',  min_value=0.0, max_value=0.5, step=0.05)))

    model.add(Dense(8, activation='softmax'))  # Assuming 8 classes, adjust as necessary
    model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])
    return model


import shutil

# Be careful with this operation to avoid deleting important data
shutil.rmtree('my_dir/hparam_tuning')

# Now, create a new tuner instance as before
tuner2 = RandomSearch(
    build_model_pca,
    objective='val_accuracy',
    executions_per_trial=1,
    directory='my_dir',
    project_name='hparam_tuning'  # The same project name can be reused after deletion
)



tuner2.search(X_training_pca, y_training, epochs=20, validation_data=(X_val_pca, y_val))    

Trial 10 Complete [00h 00m 08s]
val_accuracy: 0.596666693687439

Best val_accuracy So Far: 0.596666693687439
Total elapsed time: 00h 02m 01s


In [23]:
# Get the best model
best_model_pca = tuner2.get_best_models(num_models=1)[0]
best_model_pca.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 320)               32320     
                                                                 
 dropout (Dropout)           (None, 320)               0         
                                                                 
 dense_1 (Dense)             (None, 96)                30816     
                                                                 
 dropout_1 (Dropout)         (None, 96)                0         
                                                                 
 dense_2 (Dense)             (None, 8)                 776       
                                                                 
Total params: 63,912
Trainable params: 63,912
Non-trainable params: 0
_________________________________________________________________


In [24]:
test_loss, test_acc = best_model_pca.evaluate(X_val_pca,y_val)
print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.5967


In [33]:
best_trial = tuner2.oracle.get_best_trials(num_trials=1)[0]
print("Best trial ID:", best_trial.trial_id)

Best trial ID: 09


In [39]:
#best_hps = tuner2.oracle.get_best_trials(num_trials=1)[0].hyperparameters.values
best_hps = tuner2.get_best_hyperparameters(num_trials=1)[0]
best_trial = tuner2.oracle.get_best_trials(num_trials=1)[0]
performance_scores= tuner2.oracle.get_trial(best_trial.trial_id).score
performance_scores


0.596666693687439

## Wrapper Method with NN (只是试一下 不能用)

In [25]:
def build_model_wrapper(hp, input_shape):
    model = Sequential()
    model.add(Dense(units=hp.Int('units', min_value=32, max_value=518, step=32),
                    activation=hp.Choice('activation', values=['relu', 'softplus', 'tanh', 'sigmoid']),
                    input_shape=input_shape))
    model.add(Dropout(rate=hp.Float('dropout_0', min_value=0.0, max_value=0.5, step=0.05)))
    for i in range(hp.Int('layers', 1, 5)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=518, step=32),
                        activation=hp.Choice(f'activation_{i}', values=['relu', 'softplus', 'tanh', 'sigmoid'])))
        model.add(Dropout(rate=hp.Float(f'dropout_{i+1}',  min_value=0.0, max_value=0.5, step=0.05)))
    model.add(Dense(8, activation='softmax'))
    model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [45]:
# Assuming X_training, y_training, X_val, and y_val are available
performance_scores = {}
n_features = X_training.shape[1]

for i in range(n_features):
    # Create a new training set omitting the i-th feature
    X_train_reduced = np.delete(X_training, i, axis=1)
    X_val_reduced = np.delete(X_val, i, axis=1)
    
    # Update your tuner or model-building function here to accommodate the new shape
    tuner_w = RandomSearch(
        lambda hp: build_model_wrapper(hp, input_shape=(X_train_reduced.shape[1],)),
        objective='val_accuracy',
        max_trials=5,
        executions_per_trial=1,
        directory='my_dir',
        project_name=f'hparam_tuning_feature_{i}'
    )
    
    tuner_w.search(X_train_reduced, y_training, epochs=20, validation_data=(X_val_reduced, y_val))
    
    # Store the best score obtained by omitting the i-th feature
    best_hps = tuner_w.get_best_hyperparameters(num_trials=1)[0]
    best_trial = tuner_w.oracle.get_best_trials(num_trials=1)[0]
    performance_scores[i]= tuner_w.oracle.get_trial(best_trial.trial_id).score  

# Evaluate performance scores to determine feature importance


Trial 5 Complete [00h 00m 16s]
val_accuracy: 0.590833306312561

Best val_accuracy So Far: 0.5950000286102295
Total elapsed time: 00h 01m 22s

Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
256               |256               |units
sigmoid           |sigmoid           |activation
0.05              |0.05              |dropout_0
5                 |5                 |layers
96                |96                |units_0
relu              |relu              |activation_0
0.45              |0.45              |dropout_1

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20

KeyboardInterrupt: 

In [44]:
performance_scores

{1: 0.6066666841506958,
 2: 0.5991666913032532,
 3: 0.6016666889190674,
 4: 0.6025000214576721,
 5: 0.6058333516120911,
 6: 0.6066666841506958,
 7: 0.6050000190734863}

In [None]:
#from mlxtend.feature_selection import SequentialFeatureSelector as SFS
#from sklearn.linear_model import LogisticRegression



#sfs = SFS(LogisticRegression(), 
#          k_features=7,  # 'best' or an integer for a fixed number of features
 #         forward=True, 
 #         floating=False, 
 #         scoring='accuracy',
 #         cv=5)  # 5-fold cross-validation
#
#sfs = sfs.fit(X_training, y_training)

# Now select the features from your training and validation set
#X_training_selected = sfs.transform(X_training)
#X_val_selected = sfs.transform(X_val)

# Then you would use these selected features to define the input shape of your NN model
#def build_nn_model(input_shape):
##    model = Sequential()
    # Define the model with the input shape as determined by the number of selected features
 #   model.add(Dense(units=64, activation='relu', input_shape=input_shape))
 #   # Add more layers as needed...
 #   model.add(Dense(8, activation='softmax'))  # For 8 classes
  #  model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])
 #   return model

# Create the NN model using the selected features
#nn_model = build_nn_model(input_shape=(X_training_selected.shape[1],))

# Train the NN model on the selected features
#nn_model.fit(X_training_selected, y_training, epochs=20, validation_data=(X_val_selected, y_val))


## Classification with knn and prediction on the test set

In [None]:
# Example with a k-nearest neighbours classifier
clf = KNeighborsClassifier(n_neighbors = 5)
clf.fit(X_train, y_train)
print('Accuracy of 5-nn on the training set: ', clf.score(X_train, y_train)) # evaluate the accuracy on the training set

Accuracy of 5-nn on the training set:  0.5468333333333333


In [None]:
y_pred = clf.predict(X_test) # compute knn predictions on the test inputs
y_pred

array(['Folk', 'Hip-Hop', 'International', ..., 'Experimental', 'Pop',
       'Folk'], dtype=object)

## Export in csv format

In [None]:
# Export the predictions on the test data in csv format
prediction = pd.DataFrame(y_pred, columns=['Genre'])
prediction.index.name='Id'
prediction.to_csv('myprediction.csv') # export to csv file

# The csv file should be of the form
#Id, Genre
#0, Folk
#1, Hip-Hop
#2, International
#...
#1998, Experimental
#1999, Pop