In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import classification_report

# Load the data
data_path = 'train_data_knn_imputed.csv'
data = pd.read_csv(data_path)

# Selecting features and target variable
features = [
    "CurrentSessionLength", "CurrentGameMode_LabelEncoded", "CurrentTask_TargetEncoded",
    "LastTaskCompleted_TargetEncoded", "LevelProgressionAmount", "Month",
    "WeekendFlag", "PeriodOfDay_Night", "QuestionTiming_System Initiated", "UserAvgResponse"
]
X = data[features]
y = data['ResponseValue']

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#-----------------------------------------------------------

# Create an MLP classifier with specified parameters
mlp = MLPClassifier(
    hidden_layer_sizes=(200,70,140),  # Number of neurons in the hidden layer
    max_iter=50000,  # Maximum number of iterations for training, more is better usually but can result in overfitting
    alpha=0.0001,  # L2 penalty (regularization)
    solver='sgd',  # Optimization algorithm (Stochastic Gradient Descent)
    learning_rate='constant',
    #solver='adam' another version that is better for GPUS compared to sgd,
    random_state=21,  # Random state for reproducibility
    tol=0.000000001  # Tolerance for stopping criteria
)

# Fit the model to the training data
mlp.fit(X_train, y_train)

# Make predictions on the test data using the trained MLP classifier
y_pred_mlp = mlp.predict(X_test)


#-------------------------


# Calculate the evaluation metrics
mse = mean_squared_error(y_test, y_pred_mlp)
r2 = r2_score(y_test, y_pred_mlp)
mae = mean_absolute_error(y_test, y_pred_mlp)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R²): {r2}")
print(f"Mean Absolute Error (MAE): {mae}")

#print(classification_report(y_test, y_pred_mlp))