In [174]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

In [175]:
housing_data = fetch_california_housing()

x = housing_data.data
y = housing_data.target

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, train_size=0.9, random_state=75)

In [176]:
# Normalize: put values in the same range
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

y_train = y_train.reshape(-1, 1)
y_train = scaler.fit_transform(y_train)

In [177]:
# multiple Linear Regression
from sklearn.linear_model import LinearRegression
linear_regressor = LinearRegression()
linear_regressor.fit(X_train, y_train)

predicted = linear_regressor.predict(X_test)

# Transform predicted from normalized to real values
predicted = scaler.inverse_transform(predicted)

In [178]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = (np.array(y_true), np.array(y_pred))
    return np.mean(np.abs((y_true - y_pred)/y_true)) * 100

In [179]:
# Evaluation Metrics
import math
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mae = mean_absolute_error(y_true=y_test, y_pred=predicted)
mse = mean_squared_error(y_true=y_test, y_pred=predicted)
rme = math.sqrt(mse)
r2 = r2_score(y_true=y_test, y_pred=predicted)
mape = mean_absolute_percentage_error(y_true=y_test, y_pred=predicted)


In [180]:
# Plynomial Linear regression
# For polynomial Linear regression you should use only one feature
from sklearn.preprocessing import PolynomialFeatures

X2 = x[:,2]
y2 = y

X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.25, train_size=0.75, random_state=75)


In [181]:
poly_p = PolynomialFeatures(degree=2)
X2_train = X2_train.reshape(-1, 1) #Normalize
X2_train_poly = poly_p.fit_transform(X2_train)

X2_test = X2_test.reshape(-1, 1) #Normalize
X2_test_poly = poly_p.fit_transform(X2_test)

In [182]:
linear_regressor2 = LinearRegression()
poly_lr = linear_regressor2.fit(X2_train_poly, y2_train)
predicted2 = poly_lr.predict(X2_test_poly)

In [183]:
from sklearn.metrics import r2_score
r2 = r2_score(y2_test, predicted2)
r2

0.038811049674085774

In [198]:
# Random Forest Model
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(random_state=33, n_estimators=500)
rfr.fit(X2_train, y2_train)
rfr_prediction = rfr.predict(X2_test)

In [199]:
rfr_prediction = rfr_prediction.reshape(-1, 1)
rfr_prediction = scaler.inverse_transform(rfr_prediction)
mae2 = mean_absolute_error(y_true=y2_test, y_pred=rfr_prediction)
mse2 = mean_squared_error(y_true=y2_test, y_pred=rfr_prediction)
print(mae2)
print(np.sqrt(mse2))

8.094749475380478
9.049078182475752


In [200]:
# Support Vector Regression SVR
from sklearn.svm import SVR
svr_regressor = SVR(kernel="rbf")
svr_regressor.fit(X2_train, y2_train)
svr_prediction = svr_regressor.predict(X2_test)
svr_prediction = svr_prediction.reshape(-1, 1)
svr_prediction = scaler.inverse_transform(svr_prediction)

In [201]:
mae2 = mean_absolute_error(y_true=y2_test, y_pred=svr_prediction)
mse2 = mean_squared_error(y_true=y2_test, y_pred=svr_prediction)
print(mae2)
print(np.sqrt(mse2))

7.340526704232625
7.730326277906202
