In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score, cross_val_predict, KFold
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score



In [4]:
# Load dataset
df = pd.read_csv("lasso_selected.csv")

# Assume the target variable is in the last column
X = df.iloc[:, :-1]  # Features
y = df.iloc[:, -1]   # Target variable

# Define cross-validation strategy
cv = KFold(n_splits=5, shuffle=True, random_state=42)

# Function to evaluate model with cross-validation
def evaluate_model(model, X, y, cv):
    y_pred = cross_val_predict(model, X, y, cv=cv)
    rmse = np.sqrt(mean_squared_error(y, y_pred))
    r2 = r2_score(y, y_pred)
    return rmse, r2

# Initialize models
rf = RandomForestRegressor(n_estimators=100, random_state=42)
xgb = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
mlp = MLPRegressor(hidden_layer_sizes=(64, 32), activation='relu', solver='adam', max_iter=1000, random_state=42)

# Evaluate each model
models = {"Random Forest": rf, "XGBoost": xgb, "Neural Network": mlp}
results = {}

for name, model in models.items():
    rmse, r2 = evaluate_model(model, X, y, cv)
    results[name] = {"RMSE": rmse, "R²": r2}



In [7]:
print(results_df)

# If using Jupyter Notebook, display it nicely
from IPython.display import display
display(results_df)


                        RMSE        R²
Random Forest   1.066802e+06  0.390357
XGBoost         1.308340e+06  0.083043
Neural Network  1.372350e+06 -0.008876


Unnamed: 0,RMSE,R²
Random Forest,1066802.0,0.390357
XGBoost,1308340.0,0.083043
Neural Network,1372350.0,-0.008876
