In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.datasets import fetch_california_housing

# Load the dataset
california = fetch_california_housing()
df = pd.DataFrame(california.data, columns=california.feature_names)
df['MedHouseVal'] = california.target

In [3]:
df

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422
...,...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,0.781
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,0.771
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,0.923
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32,0.847


In [4]:
from sklearn.preprocessing import StandardScaler
# Check for missing values
df.isnull().sum()

# Split features and target variable
x = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']

# Standardize the features
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)


In [5]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Linear Regression model
lr_model = LinearRegression()

lr_model.fit(x_train, y_train)

y_pred_lr = lr_model.predict(x_test)

mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print(f"Linear Regression MSE: {mse_lr}")
print(f"Linear Regression R2 Score: {r2_lr}")

Linear Regression MSE: 0.5558915986952444
Linear Regression R2 Score: 0.5757877060324508


In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [8]:
# Neural network model
tf_model = Sequential()
tf_model.add(Dense(64, input_dim=x_train.shape[1], activation='relu'))
tf_model.add(Dense(32, activation='relu'))
tf_model.add(Dense(1))

tf_model.compile(optimizer=Adam(learning_rate=0.01), loss='mse')

tf_model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=1)

y_pred_tf = tf_model.predict(x_test)

mse_tf = mean_squared_error(y_test, y_pred_tf)
r2_tf = r2_score(y_test, y_pred_tf)

print(f"TensorFlow Model MSE: {mse_tf}")
print(f"TensorFlow Model R2 Score: {r2_tf}")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
TensorFlow Model MSE: 0.28263993340021193
TensorFlow Model R2 Score: 0.7843116629286001
