In [1]:
# Lasso Regression Example with California Housing Dataset
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_california_housing
import pandas as pd

# Load sample dataset
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Lasso Regression model
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)

# Predictions
y_pred = lasso.predict(X_test)

# Evaluation
print("R^2 Score:", r2_score(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("Coefficients:", lasso.coef_)

R^2 Score: 0.5318167610318159
MSE: 0.6135115198058131
Coefficients: [ 3.92693362e-01  1.50810624e-02 -0.00000000e+00  0.00000000e+00
  1.64168387e-05 -3.14918929e-03 -1.14291203e-01 -9.93076483e-02]


In [4]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import fetch_openml
import pandas as pd

# Load dataset (Ames Housing example)
data = fetch_openml(name="house_prices", as_frame=True)
df = data.frame

# Encode categorical columns
for col in df.select_dtypes(include=['category', 'object']).columns:
    df[col] = LabelEncoder().fit_transform(df[col].astype(str))

# Fill missing values with column mean
X = df.drop(columns=["SalePrice"])
y = df["SalePrice"]
X = X.fillna(X.mean())

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Lasso
lasso = Lasso(alpha=0.1, max_iter=10000)
lasso.fit(X_train, y_train)

# Predictions
y_pred = lasso.predict(X_test)

# Evaluation
print("R^2 Score:", r2_score(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("Coefficients:", lasso.coef_)

R^2 Score: 0.8430888598177659
MSE: 1203559630.7394028
Coefficients: [-2.42333875e+00 -1.38647871e+02 -2.10636577e+03 -2.61297312e+02
  3.52442864e-01  1.97209584e+04  4.34580988e+03 -1.10002128e+03
  3.10787746e+03 -5.19573691e+04 -1.81126449e+01  1.14788483e+04
  4.38413243e+02 -7.59962912e+02 -1.10517999e+04 -2.24393707e+03
 -8.58568450e+02  1.05088548e+04  5.23916032e+03  2.46837916e+02
  3.59294874e+01  2.54947964e+03  5.70601465e+03 -6.05467321e+02
  1.92969043e+02  3.87035108e+03  2.58865199e+01 -8.23356011e+03
  1.05290591e+02  1.03214372e+03 -9.56103016e+03  3.50284451e+03
 -3.61760045e+03 -4.99655669e+02  4.45507825e+00  1.01695707e+03
  5.23639665e+00 -3.00666248e+00 -1.67070327e+00  2.30152359e+02
 -4.02578769e+02  3.96197053e+02 -4.56619830e+02  4.52706470e+01
  3.69094046e+01 -1.22120290e+00  4.87048552e+00  6.92907951e+03
 -4.62692477e+03  2.91016546e+03 -2.02355015e+02 -1.99381767e+03
 -1.04576782e+04 -9.54611591e+03  3.75519605e+03  4.66142452e+03
  3.99867399e+03 -1.05

  model = cd_fast.enet_coordinate_descent(


In [5]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import fetch_openml
import pandas as pd

# -----------------------------
# Choose dataset here:
# Options: "house_prices", "wine-quality-red", "Concrete_Compressive"
dataset_name = "house_prices"
# -----------------------------

# Load dataset
data = fetch_openml(name=dataset_name, as_frame=True)
df = data.frame

# Separate target based on dataset
if dataset_name == "house_prices":
    target_col = "SalePrice"
elif dataset_name == "wine-quality-red":
    target_col = "quality"
elif dataset_name == "Concrete_Compressive":
    target_col = "Concrete_compressive_strength"
else:
    raise ValueError("Dataset not supported!")

# Encode categorical columns (if any)
for col in df.select_dtypes(include=['category', 'object']).columns:
    df[col] = LabelEncoder().fit_transform(df[col].astype(str))

# Separate features and target
X = df.drop(columns=[target_col])
y = df[target_col]

# Fill missing values
X = X.fillna(X.mean())

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Lasso Regression
lasso = Lasso(alpha=0.1, max_iter=10000)
lasso.fit(X_train, y_train)

# Predictions
y_pred = lasso.predict(X_test)

# Evaluation
print(f"Dataset: {dataset_name}")
print("R^2 Score:", r2_score(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("Coefficients:", lasso.coef_)

Dataset: house_prices
R^2 Score: 0.8430888598177659
MSE: 1203559630.7394028
Coefficients: [-2.42333875e+00 -1.38647871e+02 -2.10636577e+03 -2.61297312e+02
  3.52442864e-01  1.97209584e+04  4.34580988e+03 -1.10002128e+03
  3.10787746e+03 -5.19573691e+04 -1.81126449e+01  1.14788483e+04
  4.38413243e+02 -7.59962912e+02 -1.10517999e+04 -2.24393707e+03
 -8.58568450e+02  1.05088548e+04  5.23916032e+03  2.46837916e+02
  3.59294874e+01  2.54947964e+03  5.70601465e+03 -6.05467321e+02
  1.92969043e+02  3.87035108e+03  2.58865199e+01 -8.23356011e+03
  1.05290591e+02  1.03214372e+03 -9.56103016e+03  3.50284451e+03
 -3.61760045e+03 -4.99655669e+02  4.45507825e+00  1.01695707e+03
  5.23639665e+00 -3.00666248e+00 -1.67070327e+00  2.30152359e+02
 -4.02578769e+02  3.96197053e+02 -4.56619830e+02  4.52706470e+01
  3.69094046e+01 -1.22120290e+00  4.87048552e+00  6.92907951e+03
 -4.62692477e+03  2.91016546e+03 -2.02355015e+02 -1.99381767e+03
 -1.04576782e+04 -9.54611591e+03  3.75519605e+03  4.66142452e+03


  model = cd_fast.enet_coordinate_descent(
