In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error
import statsmodels.api as sm

df = sm.datasets.get_rdataset("Hitters", "ISLR").data
df = df.dropna()
for col in df.select_dtypes(include=['object']).columns:
    df[col] = pd.Categorical(df[col]).codes

y = df['Salary']
X = df.drop(columns=['Salary'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

lr = LinearRegression().fit(X_train_scaled, y_train)
ridge = Ridge(alpha=0.5748).fit(X_train_scaled, y_train)
lasso = Lasso(alpha=0.5748, max_iter=10000).fit(X_train_scaled, y_train)

y_pred_lr = lr.predict(X_test_scaled)
y_pred_ridge = ridge.predict(X_test_scaled)
y_pred_lasso = lasso.predict(X_test_scaled)

def eval_model(name, y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{name:<20} →  MSE: {mse:.3f}   |   R²: {r2:.3f}")
    return r2

r2_lr = eval_model("Linear Regression", y_test, y_pred_lr)
r2_ridge = eval_model("Ridge Regression", y_test, y_pred_ridge)
r2_lasso = eval_model("Lasso Regression", y_test, y_pred_lasso)

best_model = max([(r2_lr, "Linear Regression"), (r2_ridge, "Ridge Regression"), (r2_lasso, "Lasso Regression")])[1]
print(f"\nBest Performing Model: {best_model}")

Linear Regression    →  MSE: 128284.345   |   R²: 0.291
Ridge Regression     →  MSE: 126603.903   |   R²: 0.300
Lasso Regression     →  MSE: 126678.115   |   R²: 0.300

Best Performing Model: Ridge Regression


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.Categorical(df[col]).codes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.Categorical(df[col]).codes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.Categorical(df[col]).codes
  ret = a @ b
  ret = a @ b
  ret = a @ b
