# Aaron, MLP

In [7]:
from perform_kickstarer_eda import X_train, y_train, X_test, y_test


In [8]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
print(tf.__version__)

# Split the training data into a smaller training set and a validation set
X_train_small, X_val, y_train_small, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

2.16.2


In [9]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import SGD, Adam
from scikeras.wrappers import KerasRegressor

def build_model(hidden_units=(64,), activation='relu', optimizer='adam', learning_rate=0.001):
    inputs = Input(shape=(X_train.shape[1],))
    x = inputs
    for units in hidden_units:
        x = Dense(units, activation=activation)(x)
    outputs = Dense(1)(x)

    model = Model(inputs, outputs)

    if optimizer == 'sgd':
        opt = SGD(learning_rate=learning_rate)
    else:
        opt = Adam(learning_rate=learning_rate)

    model.compile(loss='mean_squared_error', optimizer=opt, metrics=['mae'])
    return model

print("X_train shape:", X_train_small.shape)
print("y_train shape:", y_train_small.shape)



X_train shape: (13200, 66)
y_train shape: (13200,)


In [None]:
# Use GridSearchCV to find the best hyperparameters
from sklearn.model_selection import GridSearchCV

regressor = KerasRegressor(
    model=build_model,
    verbose=0
)

param_grid = {
    'model__hidden_units': [(16,), (32,), (64,), (128,), (256,), (32,16), (64, 32), (128, 64),(64, 32, 16)],
    'model__activation': ['relu', 'tanh', 'logistic', 'identity'],
    'model__optimizer': ['adam', 'sgd'],
    'model__learning_rate': [0.001, 0.01],
    'epochs': [50, 100, 200],
    'batch_size': [16, 32, 64]
}

grid = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train, y_train) # Using X_train instead of X_train_small as GridSearchCV handles validation split


print("Best Params:", grid_result.best_params_)
print("Best MSE:", -grid_result.best_score_)

# Evaluate on test set
best_model = grid_result.best_estimator_.model_
mse, mae = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test MSE: {mse:.4f}, MAE: {mae:.4f}")


In [None]:
# from pandas import DataFrame
# import matplotlib.pyplot as plt
# from sklearn.model_selection import KFold

# # Test multiple architectures
# architectures = [
#     [16],
#     [32],
#     [64],
#     [128],
#     [256],
#     [32, 16],
#     [64, 32],
#     [128, 64],
#     [64, 32, 16],
# ]

# results = []
# for arch in architectures:
#     print(f"Testing architecture: {arch}")
#     result = create_and_evaluate_model(arch, X_train_small, y_train_small, X_val, y_val, X_test, y_test)
#     results.append({
#         "architecture": str(arch),
#         "mse": result["mse"],
#         "mae": result["mae"],
#         "r2": result["r2"]
#     })

# # Display results as a table
# results_df = DataFrame(results)
# print("\nResults comparison:")
# print(results_df)

# # Plot comparison - only include rows without NaN values
# valid_results = results_df.dropna()
# if len(valid_results) > 0:
#     plt.figure(figsize=(12, 5))
#     plt.subplot(1, 2, 1)
#     plt.bar(valid_results["architecture"], valid_results["r2"])
#     plt.title("R² Score by Architecture")
#     plt.xlabel("Architecture")
#     plt.ylabel("R² Score")
#     plt.xticks(rotation=45)

#     plt.subplot(1, 2, 2)
#     plt.bar(valid_results["architecture"], valid_results["mse"])
#     plt.title("MSE by Architecture")
#     plt.xlabel("Architecture")
#     plt.ylabel("Mean Squared Error")
#     plt.xticks(rotation=45)

#     plt.tight_layout()
#     plt.show()
# else:
#     print("No valid results to plot comparisons")

# # Cross-validation comparison with Linear Regression
# print("\nPerforming cross-validation comparison...")
# kf = KFold(n_splits=5, shuffle=True, random_state=42)

# mlp_cv_scores = []
# lr_cv_scores = []

# for train_idx, val_idx in kf.split(X_scaled):
#     # Get fold data
#     X_fold_train, X_fold_val = X_scaled[train_idx], X_scaled[val_idx]
#     y_fold_train, y_fold_val = y.iloc[train_idx], y.iloc[val_idx]

#     # Train and evaluate MLP with best architecture
#     tf.keras.backend.clear_session()
#     inputs = tf.keras.layers.Input(shape=(X_fold_train.shape[1],))
#     x = tf.keras.layers.Dense(64, activation="relu")(inputs)
#     x = tf.keras.layers.Dense(32, activation="relu")(x)
#     outputs = tf.keras.layers.Dense(1)(x)

#     fold_mlp = tf.keras.models.Model(inputs=inputs, outputs=outputs)
#     fold_mlp.compile(loss="mean_squared_error", optimizer="sgd")
#     fold_mlp.fit(X_fold_train, y_fold_train, epochs=50, verbose=0)

#     try:
#         y_pred_fold_mlp = fold_mlp.predict(X_fold_val).flatten()
#         # Check for NaN values
#         if isnan(y_pred_fold_mlp).any():
#             print("Warning: NaN values in MLP predictions for this fold")
#             # Use only non-NaN predictions
#             mask = ~isnan(y_pred_fold_mlp)
#             if mask.sum() > 0:
#                 mlp_cv_scores.append(r2_score(y_fold_val.iloc[mask], y_pred_fold_mlp[mask]))
#             else:
#                 mlp_cv_scores.append(nan)
#         else:
#             mlp_cv_scores.append(r2_score(y_fold_val, y_pred_fold_mlp))
#     except Exception as e:
#         print(f"Error in MLP cross-validation: {e}")
#         mlp_cv_scores.append(nan)


# # Filter out NaN scores
# mlp_cv_scores_filtered = [score for score in mlp_cv_scores if not isnan(score)]

# if mlp_cv_scores_filtered:
#     print(f"MLP Cross-Validation R² scores: {mlp_cv_scores}")
#     print(f"MLP Cross-Validation Mean R²: {mean(mlp_cv_scores_filtered):.4f}")
# else:
#     print(f"MLP Cross-Validation R² scores: {mlp_cv_scores}")
#     print("No valid MLP cross-validation scores")

