<h2>Assignment 2 - Using a Neural Network to fit the California Housing data</h2>

# Data Preparation

In [50]:
# California Housing dataset
import urllib.request
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

import torch
from torch import save, load
import torch.nn as nn
from sklearn.metrics import r2_score

# load data from csv file
urllib.request.urlretrieve("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv", "housing.csv")
housing = pd.read_csv('housing.csv')

# Using the setting inplace=False, drop() creates a copy of the data and does not affect housing dataset
housing_data = housing.drop("median_house_value", axis=1, inplace=False)
housing_target = housing["median_house_value"].copy()
feature_names = list(housing_data.columns)

#  Transformation pipeline at https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="median")),
    ('std_scaler', StandardScaler()),
])

full_pipeline = ColumnTransformer([
    ('num', num_pipeline, feature_names[:-1]),
    ('cat', OneHotEncoder(), [feature_names[-1]]),
])

housing_preprocessed = full_pipeline.fit_transform(housing_data)

print(housing_preprocessed.shape)


(20640, 13)


In [3]:
X = housing_preprocessed
y = housing_target.to_numpy()

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# (for comparsion) Using scikit-learn's Linear Regression model to fit the data

In [4]:
from sklearn.linear_model import LinearRegression

# documentation at https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
lr = LinearRegression()

lr.fit(X_train, y_train)
print("model training error : %.3f" % lr.score(X_train, y_train))
print("model testing error: %.3f" % lr.score(X_test, y_test))

model training error : 0.646
model testing error: 0.642


In [7]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


Initializing tensors

In [94]:
# Converting datasets to tensors

X_train = torch.tensor(X_train)
X_test = torch.tensor(X_test)
y_train = torch.tensor(y_train)
y_test = torch.tensor(y_test)


  X_train = torch.tensor(X_train)
  X_test = torch.tensor(X_test)
  y_train = torch.tensor(y_train)
  y_test = torch.tensor(y_test)


# (Task 1 of Assignment 2): Using PyTorch nn.Sequential() to build a neural network to fit the data

In [103]:

# 1) Design model - input, output, forward pass with different layers

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size,input_size // 2),
            nn.ReLU(),
            nn.Linear(input_size //2 ,input_size // 4),
            nn.ReLU(),
            nn.Linear(input_size // 4 , 1),
        )
        self.double()
    
    def forward(self, x):
        return self.model(x)



model = NeuralNet(13)

# 2) Create loss and optimizer

learning_rate = 0.01
epochs = 10000

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

# 3) Training Loop
#        Forward (compute predictions and loss)
#        Backward (compute gradients)
#        Update weights


for epoch in range (epochs):
    # predict y (forward pass)
    y_pred = model(X_train)

    # Compute loss
    loss = loss_fn(y_train, y_pred)

    # Clear gradients for next loop
    optimizer.zero_grad()

    # Calculate gradients (backward pass)
    loss.backward()

    # Update weights
    optimizer.step()

    # Print training progress every x number of epochs
    if ((epoch+1) % 100 == 0):
        print(f'Epoch: {epoch+1} loss: {loss.item()}')






KeyboardInterrupt: 

In [51]:
# save model

with open('model_state.pt', 'wb') as f:
    save(model.state_dict(), f)

with open('model_state.pt', 'rb') as f:
    model.load_state_dict(load(f))

# (Task 2 of Assignment 2): Subclassing nn.Module to build a neural network (the same network structure as Task 1) to fit the data

In [101]:

# 1) Design model - input, output, forward pass with different layers

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, input_size // 2)  # First hidden layer
        self.relu1 = nn.ReLU()  # ReLU activation function
        self.fc2 = nn.Linear(input_size // 2, input_size // 4)  # Second hidden layer
        self.relu2 = nn.ReLU()  # ReLU activation function
        self.output_layer = nn.Linear(input_size //4 , 1)  # Output layer with linear activation
        self.double()
    
    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.output_layer(x)
        return x


model = NeuralNet(13)

# 2) Create loss and optimizer

learning_rate = 0.01
epochs = 10000

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

# 3) Training Loop
#        Forward (compute predictions and loss)
#        Backward (compute gradients)
#        Update weights



for epoch in range (epochs):
    # predict y (forward pass)
    y_pred = model(X_train)

    # Compute loss
    loss = loss_fn(y_train, y_pred)

    # Clear gradients for next loop
    optimizer.zero_grad()

    # Calculate gradients (backward pass)
    loss.backward()

    # Update weights
    optimizer.step()

    # Print training progress every x number of epochs
    if ((epoch+1) % 100 == 0):
        print(f'Epoch: {epoch+1} loss: {loss.item()}')




Epoch: 10 loss: 43095924076.973465
Epoch: 20 loss: 33209436758.258667
Epoch: 30 loss: 26609139011.62903
Epoch: 40 loss: 22202727620.021893
Epoch: 50 loss: 19260972248.152504
Epoch: 60 loss: 17297032910.93914
Epoch: 70 loss: 15985891353.372066


KeyboardInterrupt: 