In [1]:
import numpy as np
import pandas as pd
from joblib import dump, load
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNet, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVR

In [2]:
dataset = fetch_california_housing()

df = pd.DataFrame(dataset["data"], columns=dataset["feature_names"])
df["MedHouseVal"] = dataset["target"]

In [3]:
x = df.drop(["MedHouseVal"], axis=1)
y = df["MedHouseVal"]

np.random.seed(8)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [4]:
# Ridge Regression
model = Ridge()
model.fit(X_train, y_train)
model.score(X_test, y_test)

0.6179212072163109

In [5]:
# Lasso Regression
model = Lasso()
model.fit(X_train, y_train)
model.score(X_test, y_test)

0.2822949106689715

In [6]:
# ElasticNet Regression
model = ElasticNet()
model.fit(X_train, y_train)
model.score(X_test, y_test)

0.41708242189778755

In [7]:
# Ensemble Regression
model = RandomForestRegressor()
model.fit(X_train, y_train)
model.score(X_test, y_test)

# if score == 1:
#     dump(model, "./models/california-housing-model.joblib")

0.8113318348400477

In [8]:
# Manual model scoring

predictions = model.predict(X_test)
labels = y_test
n_samples = len(y_test)
n_features = len(X_test.columns)
mean = y_test.mean()

ssr = np.square(np.subtract(predictions, labels))
sst = np.square(np.subtract(labels, mean))
r2 = 1 - (ssr.sum() / sst.sum())
adjusted_r2 = 1 - (1 - r2) * (n_samples - 1) / (n_samples - n_features - 1)

adjusted_r2

0.810965399947773