Use 3 Regression algorithms and conclude which is best:
1. Build a regression model using scikit-learn. You can use a dataset like the California Housing dataset
▪ Load the dataset.
▪ Preprocess and explore the data.
▪ Split the data into training and testing sets.
▪ Use regression algorithm (e.g., Linear Regression, Decision Trees, Random Forest).
▪ Tune hyperparameters for better performance (use GridSearchCV

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

In [None]:
california_housing = fetch_california_housing(as_frame=True)

In [None]:
X = california_housing.data
y = california_housing.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
linear_regression = LinearRegression()
linear_regression.fit(X_train, y_train)

In [None]:
decision_tree_regressor = DecisionTreeRegressor(random_state=42)
decision_tree_regressor.fit(X_train, y_train)

In [None]:
random_forest_regressor = RandomForestRegressor(random_state=42)
random_forest_regressor.fit(X_train, y_train)

In [None]:
linear_regression_predictions = linear_regression.predict(X_test)
decision_tree_predictions = decision_tree_regressor.predict(X_test)
random_forest_predictions = random_forest_regressor.predict(X_test)

In [None]:
linear_regression_mse = mean_squared_error(y_test, linear_regression_predictions)
decision_tree_mse = mean_squared_error(y_test, decision_tree_predictions)
random_forest_mse = mean_squared_error(y_test, random_forest_predictions)

In [None]:
param_grid = {
    'n_estimators': [10, 50, 100, 150],
    'max_depth': [None, 10, 20, 30]
}

In [None]:
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid,
                           scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train, y_train)

In [None]:
best_random_forest = grid_search.best_estimator_
best_random_forest_predictions = best_random_forest.predict(X_test)
best_random_forest_mse = mean_squared_error(y_test, best_random_forest_predictions)

In [None]:
models = ['Linear Regression', 'Decision Tree', 'Random Forest']
mse_scores = [linear_regression_mse, decision_tree_mse, best_random_forest_mse]

In [None]:
best_model = models[np.argmin(mse_scores)]

In [None]:
print(f'Linear Regression MSE: {linear_regression_mse:.2f}')
print(f'Decision Tree MSE: {decision_tree_mse:.2f}')
print(f'Best Random Forest MSE: {best_random_forest_mse:.2f}')


Linear Regression MSE: 0.56
Decision Tree MSE: 0.50
Best Random Forest MSE: 0.26


In [None]:
print(f'The best model is: {best_model}')

The best model is: Random Forest
