# Q5: Learn Feature Maps

In [None]:
import random
import warnings

warnings.filterwarnings("ignore")
random.seed(1234)

In [None]:
import csv

import pandas as pd
import plotly.express as px
from models.mlp_model import MLP
from sklearn.kernel_approximation import RBFSampler
from sklearn.metrics import accuracy_score
from training_testing.rbf_testing import plot_decision_boundary
from training_testing.rbf_training import perform_hyperparameter_search, train
from utilities import load_data

## Loading Center Surround

In [None]:
X_train, y_train = load_data("data/center_surround_train.csv")
X_valid, y_valid = load_data("data/center_surround_valid.csv")
X_test, y_test = load_data("data/center_surround_test.csv")

y_train = y_train.reshape(-1, 1)
y_valid = y_valid.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

csv_filename = "results/center_surround/rbf_hyperparameter_results.csv"
dataset = "center_surround"

## Finding hyperparameters

We're also transforming the data here and finding the optimal gamma. Note: Skip this if you do not want to search for hyperparameters.

In [None]:
hidden_layer_sizes = [15]
batch_sizes = [16, 32, 64]
learning_rates = [0.1, 0.13, 0.15]
epoch_values = [1000, 1500, 2000]
gamma_values = [0.01, 0.1, 1, 10]

In [None]:
with open(csv_filename, mode="w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(
        [
            "Gamma",
            "Hidden Layers",
            "Batch Size",
            "Learning Rate",
            "Epochs",
            "Last Train Loss",
            "Last Validation Loss",
            "Last Train Accuracy",
            "Last Validation Accuracy",
        ]
    )

In [None]:
perform_hyperparameter_search(
    hidden_layer_sizes,
    batch_sizes,
    learning_rates,
    epoch_values,
    gamma_values,
    X_train,
    y_train,
    X_valid,
    y_valid,
    csv_filename,
    dataset,
)

### Results from Grid Search

In [None]:
center_surround_results_df = pd.read_csv(
    f"results/{dataset}/rbf_hyperparameter_results.csv"
)

In [None]:
center_surround_results_df.sort_values(by="Last Validation Loss", ascending=True).head(
    10
)

### Visualizing New Features

Using the best gamma:

In [None]:
rbf_sampler = RBFSampler(gamma=0.1, n_components=3, random_state=1234)
X_train_transformed = rbf_sampler.fit_transform(X_train)

In [None]:
df_train_transformed = pd.DataFrame(
    X_train_transformed, columns=["Feature 1", "Feature 2", "Feature 3"]
)
df_train_transformed["Label"] = y_train
df_train_transformed["Label"] = df_train_transformed["Label"].astype(int).astype(str)

In [None]:
fig = px.scatter_3d(
    df_train_transformed,
    x="Feature 1",
    y="Feature 2",
    z="Feature 3",
    color="Label",
    color_discrete_map={"0": "blue", "1": "red"},
)
fig.update_layout(
    title="3D Scatter Plot of X_train_transformed",
    margin=dict(l=0, r=0, b=0, t=0),
)
fig.show()

## Testing various hyperparameters with their decision boundaries

In [None]:
lr = 0.15
batch_size = 16
k = 15
epochs = 1500
gamma = 0.1

In [None]:
rbf_sampler = RBFSampler(gamma=gamma, n_components=3, random_state=1234)
X_train_transformed = rbf_sampler.fit_transform(X_train)
X_valid_transformed = rbf_sampler.transform(X_valid)
X_test_transformed = rbf_sampler.transform(X_test)

In [None]:
model = MLP(input_size=X_train_transformed.shape[1], hidden_size=k)

In [None]:
_, _, _, _ = train(
    model,
    X_train_transformed,
    y_train,
    X_valid_transformed,
    y_valid,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

In [None]:
test_pred = model.predict(X_test_transformed)
test_accuracy = accuracy_score(y_test.squeeze(), test_pred)

print(
    f"Test accuracy for k={k}, Batch={batch_size}, LR={lr}, Gamma={gamma}: {test_accuracy}"
)

In [None]:
plot_decision_boundary(X_test_transformed, y_test, test_pred, dataset)