Working with Neural Network Models

© Hans Nieminen, Satakunta University of Applied Sciences

# Exercise 8.1

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# Load the California Housing dataset
california = fetch_california_housing()
X, y = california.data, california.target

In [None]:
# Standardize the X data
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=827)

In [None]:
train_size = len(X_train)
train_size

16512

In [None]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [None]:
X_train[:3]

tensor([[-6.7543e-01,  1.8562e+00, -8.6558e-01, -1.6128e-01, -7.0333e-01,
         -5.0463e-02,  1.0151e+00, -1.4177e+00],
        [ 8.6717e-01, -2.8919e-01,  5.9201e-01, -2.0400e-01,  2.5390e-01,
          2.9020e-02, -6.7974e-01,  3.1430e-01],
        [ 1.8051e-01, -2.0973e-01, -2.9593e-01,  7.4929e-02,  1.3451e-03,
         -9.8246e-02,  7.9505e-01, -1.1931e+00]])

In [None]:
y_train[:3]

tensor([[5.0000],
        [2.4210],
        [2.8830]])

In [None]:
class CustomDataset(Dataset):
    def __init__(self, X, y, device='cpu'):
      self.X = X.to(device)
      self.y = y.to(device)

    def __len__(self):
      return len(self.y)

    def __getitem__(self, idx):
      return self.X[idx], self.y[idx]

In [None]:
# Create a custom dataset
train_dataset = CustomDataset(X_train, y_train, device)

In [None]:
# Create a dataloader
train_loader = DataLoader(train_dataset,
                          batch_size=64,
                          shuffle=False)  # = no shuffling

In [None]:
# Define the neural network
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(8,16)
        nn.init.kaiming_normal_(self.layer1.weight,
                                nonlinearity='relu')
        nn.init.zeros_(self.layer1.bias)
        self.layer1_act = nn.ReLU()
        self.layer2 = nn.Linear(16, 8)
        nn.init.kaiming_normal_(self.layer2.weight,
                                nonlinearity='relu')
        nn.init.zeros_(self.layer2.bias)
        self.layer2_act = nn.ReLU()
        self.layer3 = nn.Linear(8, 1)

    def forward(self, x):
        x = self.layer1_act(self.layer1(x))
        x = self.layer2_act(self.layer2(x))
        x = self.layer3(x)
        return x

In [None]:
# Create the model
torch.manual_seed(41)
model = NeuralNetwork().to(device)

In [None]:
for name, parameter in model.named_parameters():
  print(name)
  print(parameter)
  print()

layer1.weight
Parameter containing:
tensor([[ 0.0508, -1.3878,  0.1322,  0.0497, -0.0540,  0.9404, -0.0139,  0.4344],
        [ 0.8615,  0.8887, -0.0219,  0.1717, -0.8935, -1.0431, -0.2809,  0.6737],
        [-0.4215,  0.1566,  0.1858, -0.1949, -0.6550, -0.2162, -0.3111,  0.7438],
        [ 0.5936, -0.0174,  0.3227,  0.2821,  0.6985, -0.6584,  1.3765,  0.5606],
        [ 0.3991,  0.0611,  0.8148,  0.6636,  0.4213, -0.0461,  0.1776, -0.2930],
        [-0.8136, -1.1033, -0.3882,  0.5658,  0.1748, -0.4435, -0.0138,  0.5217],
        [ 0.4322,  0.2795, -0.4324,  0.4571, -0.1680,  0.3608, -0.8031,  0.1989],
        [ 0.5094,  0.6617,  0.7274, -0.4670, -0.0923, -0.3678, -0.2564, -0.5330],
        [ 0.5693, -0.9604,  0.5437,  0.1676, -0.2021,  0.1903, -0.0110, -0.4085],
        [ 0.2700, -0.0329, -1.0266,  0.1208, -0.1724,  0.3901, -0.5774,  0.2017],
        [ 0.7566, -0.8163, -0.0906, -0.7853,  0.1471, -0.2997,  0.6958, -0.1618],
        [ 0.6385, -0.2078,  0.2607,  0.8029,  0.0780, -0.4595,

Question 1: What is the maximum value for the weights in the second hidden layer? Give the answer rounded to two decimals.

In [None]:
l2_max_weight = model.layer2.weight.cpu().detach().numpy().max()

In [None]:
l2_max_weight

1.1930813

In [None]:
print(f'The second hidden layer has the weight maximum of {l2_max_weight:.2f}')

The second hidden layer has the weight maximum of 1.19


Question 2: What is the Loss value after the 10th epoch? Give the answer rounded to three decimals.

In [None]:
# Set the loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),
                      lr=0.015,
                      momentum=0.9)

In [None]:
# Train the model
num_epochs = 10
model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # The loss.item() gives the mean loss for a batch
        # So we multiply it with batch size to get total loss of all samples in a batch
        running_loss += loss.item() * batch_X.size(0)

    #epoch_loss = running_loss / len(train_loader.dataset)
    epoch_loss = running_loss / train_size
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

Epoch 1/10, Loss: 2.2895
Epoch 2/10, Loss: 0.4662
Epoch 3/10, Loss: 7.2828
Epoch 4/10, Loss: 1.3288
Epoch 5/10, Loss: 1.3288
Epoch 6/10, Loss: 1.3299
Epoch 7/10, Loss: 1.3292
Epoch 8/10, Loss: 1.3292
Epoch 9/10, Loss: 1.3292
Epoch 10/10, Loss: 1.3292


Question 3: What is the value of the test loss? Give the answer rounded to three decimals.

In [None]:
# Evaluate the model on the test set
model.eval()
with torch.inference_mode():
#with torch.no_grad():
    predictions = model(X_test.to(device))
    test_loss = criterion(predictions, y_test.to(device))
    print(f'Test Loss: {test_loss.item():.3f}')

Test Loss: 1.351


Solution with no initialization of biases.

In [None]:
# Define the neural network (no intialization of biases)
class NeuralNetworkV2(nn.Module):
    def __init__(self):
        super(NeuralNetworkV2, self).__init__()
        self.layer1 = nn.Linear(8,16)
        nn.init.kaiming_normal_(self.layer1.weight,
                                nonlinearity='relu')
        self.layer1_act = nn.ReLU()
        self.layer2 = nn.Linear(16, 8)
        nn.init.kaiming_normal_(self.layer2.weight,
                                nonlinearity='relu')
        self.layer2_act = nn.ReLU()
        self.layer3 = nn.Linear(8, 1)

    def forward(self, x):
        x = self.layer1_act(self.layer1(x))
        x = self.layer2_act(self.layer2(x))
        x = self.layer3(x)
        return x

In [None]:
# Create the model
torch.manual_seed(41)
model2 = NeuralNetworkV2().to(device)

In [None]:
# Set the loss and optimizer
criterion2 = nn.MSELoss()
optimizer2 = optim.SGD(model2.parameters(),
                      lr=0.015,
                      momentum=0.9)

In [None]:
# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model2.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        # Zero the parameter gradients
        optimizer2.zero_grad()

        # Forward pass
        outputs = model2(batch_X)
        loss = criterion2(outputs, batch_y)

        # Backward pass and optimize
        loss.backward()
        optimizer2.step()

        # The loss.item() gives the mean loss for a batch
        # So we multiply it with batch size to get total loss of all samples in a batch
        running_loss += loss.item() * batch_X.size(0)

    #epoch_loss = running_loss / len(train_loader.dataset)
    epoch_loss = running_loss / train_size
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

Epoch 1/10, Loss: 0.9138
Epoch 2/10, Loss: 0.4199
Epoch 3/10, Loss: 0.3932
Epoch 4/10, Loss: 0.4008
Epoch 5/10, Loss: 0.3961
Epoch 6/10, Loss: 0.3694
Epoch 7/10, Loss: 0.3619
Epoch 8/10, Loss: 0.3575
Epoch 9/10, Loss: 0.3529
Epoch 10/10, Loss: 0.3492


In [None]:
# Evaluate the model on the test set
model2.eval()
with torch.inference_mode():
#with torch.no_grad():
    predictions = model2(X_test.to(device))
    test_loss = criterion2(predictions, y_test.to(device))
    print(f'Test Loss: {test_loss.item():.3f}')

Test Loss: 0.390
