# Aaron, MLP

In [1]:
from perform_kickstarer_eda import X_train, y_train, X_test, y_test


ks_data shape: (20632, 68)
Categorical columns: ['country', 'currency', 'deadline', 'state_changed_at', 'created_at', 'launched_at', 'category', 'deadline_weekday', 'state_changed_at_weekday', 'created_at_weekday', 'launched_at_weekday', 'launch_to_deadline', 'launch_to_state_change']
Converted boolean column: staff_pick
Dropped 'state_changed_at_weekday' column
Dropping original column: category
Encoded column: category → 25 features
Dropping original column: deadline_weekday
Encoded column: deadline_weekday → 7 features
Dropping original column: created_at_weekday
Encoded column: created_at_weekday → 7 features
Dropping original column: launched_at_weekday
Encoded column: launched_at_weekday → 7 features
Total categorical columns after encoding: 54


In [3]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
print(tf.__version__)

# Split the training data into a smaller training set and a validation set
X_train_small, X_val, y_train_small, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

2.16.2


In [None]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

def create_model(hidden_layers=1, neurons=64, activation='relu', learning_rate=0.001):
    model = Sequential()
    model.add(Dense(neurons, input_shape=(X_train.shape[1],), activation=activation))
    for _ in range(hidden_layers - 1):
        model.add(Dense(neurons, activation=activation))
    model.add(Dense(1, activation='sigmoid'))  # For binary classification
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Wrap model for use in GridSearchCV
model = KerasClassifier(
    model=create_model,
    verbose=0
)

# Define hyperparameter grid
param_grid = {
    'model__hidden_layers': [1, 2],
    'model__neurons': [32, 64],
    'model__activation': ['relu', 'tanh'],
    'model__learning_rate': [0.001, 0.01],
    'epochs': [30, 50],
    'validation_split': [0.1]  # or use validation_data manually in a custom loop
}

# Grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='accuracy')
grid_result = grid.fit(X_train, y_train)

# Show best
print("Best Params:", grid_result.best_params_)
print(f"Best Accuracy: {grid_result.best_score_:.4f}")

In [None]:
from numpy import isnan, mean, nan
from sklearn.metrics import mean_squared_error, r2_score


def create_and_evaluate_model(hidden_units, X_train, y_train, X_valid, y_valid, X_test, y_test):
    tf.keras.backend.clear_session()

    # Input layer
    inputs = tf.keras.layers.Input(shape=(X_train.shape[1],))
    x = inputs

    # Hidden layers
    for units in hidden_units:
        x = tf.keras.layers.Dense(units, activation="relu")(x)

    # Output layer
    outputs = tf.keras.layers.Dense(1)(x)

    # Create and compile model
    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    model.compile(loss="mean_squared_error", optimizer="sgd", metrics=["mae"])

    # Train
    history = model.fit(
        X_train, y_train,
        epochs=50,
        validation_data=(X_valid, y_valid),
        verbose=0
    )

    # Evaluate - use try-except to handle potential NaN values
    try:
        _, mae = model.evaluate(X_test, y_test, verbose=0)
        y_pred = model.predict(X_test).flatten()

        # Check for NaN values and handle them
        if isnan(y_pred).any():
            print(f"Warning: NaN predictions found in architecture {hidden_units}")
            # Remove NaN values for metrics calculation
            mask = ~isnan(y_pred)
            if mask.sum() > 0:  # If we have any non-NaN values
                mse = mean_squared_error(y_test[mask], y_pred[mask])
                r2 = r2_score(y_test[mask], y_pred[mask])
            else:
                mse = nan
                r2 = nan
        else:
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
    except Exception as e:
        print(f"Error evaluating model with architecture {hidden_units}: {e}")
        mae = nan
        mse = nan
        r2 = nan

    return {
        "mae": mae,
        "mse": mse,
        "r2": r2,
        "history": history.history
    }



In [None]:
from pandas import DataFrame
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

# Test multiple architectures
architectures = [
    [16],
    [32],
    [64],
    [128],
    [256],
    [32, 16],
    [64, 32],
    [128, 64],
    [64, 32, 16],
]

results = []
for arch in architectures:
    print(f"Testing architecture: {arch}")
    result = create_and_evaluate_model(arch, X_train_small, y_train_small, X_val, y_val, X_test, y_test)
    results.append({
        "architecture": str(arch),
        "mse": result["mse"],
        "mae": result["mae"],
        "r2": result["r2"]
    })

# Display results as a table
results_df = DataFrame(results)
print("\nResults comparison:")
print(results_df)

# Plot comparison - only include rows without NaN values
valid_results = results_df.dropna()
if len(valid_results) > 0:
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.bar(valid_results["architecture"], valid_results["r2"])
    plt.title("R² Score by Architecture")
    plt.xlabel("Architecture")
    plt.ylabel("R² Score")
    plt.xticks(rotation=45)

    plt.subplot(1, 2, 2)
    plt.bar(valid_results["architecture"], valid_results["mse"])
    plt.title("MSE by Architecture")
    plt.xlabel("Architecture")
    plt.ylabel("Mean Squared Error")
    plt.xticks(rotation=45)

    plt.tight_layout()
    plt.show()
else:
    print("No valid results to plot comparisons")

# Cross-validation comparison with Linear Regression
print("\nPerforming cross-validation comparison...")
kf = KFold(n_splits=5, shuffle=True, random_state=42)

mlp_cv_scores = []
lr_cv_scores = []

for train_idx, val_idx in kf.split(X_scaled):
    # Get fold data
    X_fold_train, X_fold_val = X_scaled[train_idx], X_scaled[val_idx]
    y_fold_train, y_fold_val = y.iloc[train_idx], y.iloc[val_idx]

    # Train and evaluate MLP with best architecture
    tf.keras.backend.clear_session()
    inputs = tf.keras.layers.Input(shape=(X_fold_train.shape[1],))
    x = tf.keras.layers.Dense(64, activation="relu")(inputs)
    x = tf.keras.layers.Dense(32, activation="relu")(x)
    outputs = tf.keras.layers.Dense(1)(x)

    fold_mlp = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    fold_mlp.compile(loss="mean_squared_error", optimizer="sgd")
    fold_mlp.fit(X_fold_train, y_fold_train, epochs=50, verbose=0)

    try:
        y_pred_fold_mlp = fold_mlp.predict(X_fold_val).flatten()
        # Check for NaN values
        if isnan(y_pred_fold_mlp).any():
            print("Warning: NaN values in MLP predictions for this fold")
            # Use only non-NaN predictions
            mask = ~isnan(y_pred_fold_mlp)
            if mask.sum() > 0:
                mlp_cv_scores.append(r2_score(y_fold_val.iloc[mask], y_pred_fold_mlp[mask]))
            else:
                mlp_cv_scores.append(nan)
        else:
            mlp_cv_scores.append(r2_score(y_fold_val, y_pred_fold_mlp))
    except Exception as e:
        print(f"Error in MLP cross-validation: {e}")
        mlp_cv_scores.append(nan)


# Filter out NaN scores
mlp_cv_scores_filtered = [score for score in mlp_cv_scores if not isnan(score)]

if mlp_cv_scores_filtered:
    print(f"MLP Cross-Validation R² scores: {mlp_cv_scores}")
    print(f"MLP Cross-Validation Mean R²: {mean(mlp_cv_scores_filtered):.4f}")
else:
    print(f"MLP Cross-Validation R² scores: {mlp_cv_scores}")
    print("No valid MLP cross-validation scores")

