In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error

# Load the California housing dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the dataset into training and testing sets (70:30 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Scale the features using StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

# Initialize the SGDRegressor
sgd_regressor = SGDRegressor(random_state=1)

# Set up the hyperparameter grid for GridSearchCV
param_grid = {
    'loss': ['squared_error', 'huber'],
    'penalty': ['l1', 'l2'],
    'alpha': [0.1, 0.01, 0.001],
    'max_iter': [1000, 2000, 5000]
}

# Set up GridSearchCV
grid_search = GridSearchCV(sgd_regressor, param_grid, cv=4, scoring='neg_mean_squared_error')

# Train the model with GridSearchCV
grid_search.fit(X_train_norm, y_train)

# Best model from GridSearchCV
best_model = grid_search.best_estimator_

# Predict on the test data
y_pred = best_model.predict(X_test_norm)

# Compute the score (Mean Squared Error)
mse = mean_squared_error(y_test, y_pred)
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test Mean Squared Error: {mse:.4f}")


Best Parameters: {'alpha': 0.01, 'loss': 'squared_error', 'max_iter': 1000, 'penalty': 'l1'}
Test Mean Squared Error: 0.5323


  _data = np.array(data, dtype=dtype, copy=copy,


In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

# Load the California housing dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the dataset into training and testing sets (70:30 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Scale the features using StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

# Initialize the Ridge regression model
ridge_model = Ridge()

# Set up the hyperparameter grid for GridSearchCV
param_grid = {
    'alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
    'fit_intercept': [True, False]  # With or without intercept
}

# Set up GridSearchCV
grid_search = GridSearchCV(ridge_model, param_grid, cv=4, scoring='neg_mean_squared_error')

# Train the model with GridSearchCV
grid_search.fit(X_train_norm, y_train)

# Best model from GridSearchCV
best_model = grid_search.best_estimator_

# Predict on the test data
y_pred = best_model.predict(X_test_norm)

# Compute the score (Mean Squared Error)
mse = mean_squared_error(y_test, y_pred)

# Output the results
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test Mean Squared Error: {mse:.4f}")


Best Parameters: {'alpha': 0.5, 'fit_intercept': True}
Test Mean Squared Error: 0.5296


In [3]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error

# Load the California housing dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the dataset into training and testing sets (60:40 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)

# Scale the features using StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

# Initialize the Lasso regression model
lasso_model = Lasso()

# Set up the hyperparameter grid for GridSearchCV
param_grid = {
    'alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
    'fit_intercept': [True, False]  # With or without intercept
}

# Set up GridSearchCV
grid_search = GridSearchCV(lasso_model, param_grid, cv=6, scoring='neg_mean_squared_error')

# Train the model with GridSearchCV
grid_search.fit(X_train_norm, y_train)

# Best model from GridSearchCV
best_model = grid_search.best_estimator_

# Predict on the test data
y_pred = best_model.predict(X_test_norm)

# Compute the score (Mean Squared Error)
mse = mean_squared_error(y_test, y_pred)

# Output the results
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test Mean Squared Error: {mse:.4f}")


Best Parameters: {'alpha': 0.005, 'fit_intercept': True}
Test Mean Squared Error: 0.5240


In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score

# Load the California housing dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the dataset into training and testing sets (60:40 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)

# Scale the features using StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

# Initialize the Lasso regression model
lasso_model = Lasso()

# Set up the hyperparameter grid for GridSearchCV
param_grid = {
    'alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
    'fit_intercept': [True, False]  # With or without intercept
}

# Set up GridSearchCV
grid_search = GridSearchCV(lasso_model, param_grid, cv=6, scoring='r2')

# Train the model with GridSearchCV
grid_search.fit(X_train_norm, y_train)

# Best model from GridSearchCV
best_model = grid_search.best_estimator_

# Predict on the test data
y_pred = best_model.predict(X_test_norm)

# Compute the R² score
r2 = r2_score(y_test, y_pred)

# Output the results
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test R² Score: {r2:.4f}")


Best Parameters: {'alpha': 0.005, 'fit_intercept': True}
Test R² Score: 0.6048


In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score

# Load the California housing dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the dataset into training and testing sets (70:30 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Scale the features using StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

# Initialize the Ridge regression model
ridge_model = Ridge()

# Set up the hyperparameter grid for GridSearchCV
param_grid = {
    'alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
    'fit_intercept': [True, False]  # With or without intercept
}

# Set up GridSearchCV
grid_search = GridSearchCV(ridge_model, param_grid, cv=4, scoring='r2')

# Train the model with GridSearchCV
grid_search.fit(X_train_norm, y_train)

# Best model from GridSearchCV
best_model = grid_search.best_estimator_

# Predict on the test data
y_pred = best_model.predict(X_test_norm)

# Compute the R² score
r2 = r2_score(y_test, y_pred)

# Output the results
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test R² Score: {r2:.4f}")


Best Parameters: {'alpha': 0.5, 'fit_intercept': True}
Test R² Score: 0.5971


In [6]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import r2_score

# Load the California housing dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the dataset into training and testing sets (70:30 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Scale the features using StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

# Initialize the SGDRegressor model
sgd_regressor = SGDRegressor(random_state=1)

# Set up the hyperparameter grid for GridSearchCV
param_grid = {
    'loss': ['squared_error', 'huber'],
    'penalty': ['l1', 'l2'],
    'alpha': [0.1, 0.01, 0.001],
    'max_iter': [1000, 2000, 5000]
}

# Set up GridSearchCV
grid_search = GridSearchCV(sgd_regressor, param_grid, cv=4, scoring='r2')

# Train the model with GridSearchCV
grid_search.fit(X_train_norm, y_train)

# Best model from GridSearchCV
best_model = grid_search.best_estimator_

# Predict on the test data
y_pred = best_model.predict(X_test_norm)

# Compute the R² score
r2 = r2_score(y_test, y_pred)

# Output the results
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test R² Score: {r2:.4f}")


Best Parameters: {'alpha': 0.01, 'loss': 'squared_error', 'max_iter': 1000, 'penalty': 'l1'}
Test R² Score: 0.5951


  _data = np.array(data, dtype=dtype, copy=copy,
