# 0. Import Libraries

In [2]:
pip install -q torch

Note: you may need to restart the kernel to use updated packages.


In [24]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch import optim

# 1. Load Dataset

In [25]:
possum = pd.read_csv("../datasets/possum.csv")
possum.head()

Unnamed: 0,case,site,Pop,sex,age,hdlngth,skullw,totlngth,taill,footlgth,earconch,eye,chest,belly
0,1,1,Vic,m,8.0,94.1,60.4,89.0,36.0,74.5,54.5,15.2,28.0,36.0
1,2,1,Vic,f,6.0,92.5,57.6,91.5,36.5,72.5,51.2,16.0,28.5,33.0
2,3,1,Vic,f,6.0,94.0,60.0,95.5,39.0,75.4,51.9,15.5,30.0,34.0
3,4,1,Vic,f,6.0,93.2,57.1,92.0,38.0,76.1,52.2,15.2,28.0,34.0
4,5,1,Vic,f,2.0,91.5,56.3,85.5,36.0,71.0,53.2,15.1,28.5,33.0


## 1.1 Inspect the dataset

In [26]:
possum.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   case      104 non-null    int64  
 1   site      104 non-null    int64  
 2   Pop       104 non-null    object 
 3   sex       104 non-null    object 
 4   age       102 non-null    float64
 5   hdlngth   104 non-null    float64
 6   skullw    104 non-null    float64
 7   totlngth  104 non-null    float64
 8   taill     104 non-null    float64
 9   footlgth  103 non-null    float64
 10  earconch  104 non-null    float64
 11  eye       104 non-null    float64
 12  chest     104 non-null    float64
 13  belly     104 non-null    float64
dtypes: float64(10), int64(2), object(2)
memory usage: 11.5+ KB


## 1.2 Data Cleaning and Preparation

### We will start by deleting the columns we do not need for training. The columns are: 'case', 'site', 'pop', and 'sex'.

In [27]:
drop_columns = ['case', 'site', 'Pop', 'sex']
possum = possum.drop(columns=drop_columns, axis=1)
possum.head()

Unnamed: 0,age,hdlngth,skullw,totlngth,taill,footlgth,earconch,eye,chest,belly
0,8.0,94.1,60.4,89.0,36.0,74.5,54.5,15.2,28.0,36.0
1,6.0,92.5,57.6,91.5,36.5,72.5,51.2,16.0,28.5,33.0
2,6.0,94.0,60.0,95.5,39.0,75.4,51.9,15.5,30.0,34.0
3,6.0,93.2,57.1,92.0,38.0,76.1,52.2,15.2,28.0,34.0
4,2.0,91.5,56.3,85.5,36.0,71.0,53.2,15.1,28.5,33.0


### There are also some columns that have missing values, We will delete them for now.

In [28]:
possum.dropna(subset=['age'], inplace=True)
possum.dropna(subset=['footlgth'], inplace=True)
possum.info()

<class 'pandas.core.frame.DataFrame'>
Index: 101 entries, 0 to 103
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       101 non-null    float64
 1   hdlngth   101 non-null    float64
 2   skullw    101 non-null    float64
 3   totlngth  101 non-null    float64
 4   taill     101 non-null    float64
 5   footlgth  101 non-null    float64
 6   earconch  101 non-null    float64
 7   eye       101 non-null    float64
 8   chest     101 non-null    float64
 9   belly     101 non-null    float64
dtypes: float64(10)
memory usage: 8.7 KB


### Convert all datatypes of columns to float.

In [29]:
for column in possum.columns:
    possum[column] = possum[column].astype(float)

## 1.3 Train-Test Split

### We want to predict age from the rest of the features. X are the input features and y is the age(target).

In [30]:
features = possum.drop(['age'], axis=1).columns # No change in possum dataframe will happen.
X = possum[features]
y = possum['age']

### We use sklearn to split X and y into training and testing datasets. The training set should use 80% of the data.

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    train_size=0.80,
                                                    test_size=0.20,
                                                    random_state=2) # set a random seed

print("Training size:", "Rows:", X_train.shape[0], ", Columns:", X_train.shape[1])
print("Training size:", "Rows:", X_test.shape[0], ", Columns:", X_test.shape[1])

Training size: Rows: 80 , Columns: 9
Training size: Rows: 21 , Columns: 9


## 1.4 Create a Linear Regression Baseline

### Later, if our model performs worse than Baseline, then it is not a good sign! After all, if a basic linear regression works just as well, there's no need for the neural network!

In [32]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

linear_model_test_predictions = linear_model.predict(X_test)
linear_test_mse = mean_squared_error(y_test, linear_model_test_predictions)

print("Linear Regression - Test Set MSE:", linear_test_mse)
print("Linear Regression - Test Set MAE:", linear_test_mse ** (1 / 2))

Linear Regression - Test Set MSE: 4.488469496595208
Linear Regression - Test Set MAE: 2.1186008346536656


### The mean squared error is around 4.49. This is squared error. If we take the square root, we have about 2.12. One way of interpreting this is to say that the linear regression, on average, is off by 2.12 Years.

# 2. Train a Neural Network

## 2.1 Convert dataset into Tensors

In [33]:
# Convert training set
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float).view(-1,1)

# Convert testing set
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float).view(-1,1)

## 2.2 Create a Neural Network

In [34]:
model = nn.Sequential(
    nn.Linear(9, 18),
    nn.ReLU(),
    nn.Linear(18, 9),
    nn.ReLU(),
    nn.Linear(9, 1)
)

## 2.3 Create a loss function

In [35]:
loss = nn.MSELoss()

## 2.4 Create an Optimizer

In [36]:
learning_rate = 0.001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## 2.4 Create a training Loop

In [45]:
num_epochs = 5000 # Number of training iterations
torch.manual_seed(42) # Set Random seed for reproducibility

for epoch in range(num_epochs):
    outputs = model(X_train_tensor) # Forward pass or Feed Forward
    mse = loss(outputs, y_train_tensor) # Calculate the loss 
    mse.backward() # Backward pass
    optimizer.step() # Update the weights and biases
    optimizer.zero_grad() # Reset the gradients to zero

    # Keeping track of the loss during training
    if (epoch + 1) % 1000 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], MSE Loss: {mse.item()}')

Epoch [1000/5000], MSE Loss: 3.6773438453674316
Epoch [2000/5000], MSE Loss: 3.6773438453674316
Epoch [3000/5000], MSE Loss: 3.6773440837860107
Epoch [4000/5000], MSE Loss: 3.6773440837860107
Epoch [5000/5000], MSE Loss: 3.6773438453674316


## 2.5 Evaluate the Neural Network on test dataset

In [47]:
model.eval() # set the model to evaluation mode

with torch.no_grad(): # Disable gradient calculations
    predictions = model(X_test_tensor) # Generate predictions
    nn_test_loss = loss(predictions, y_test_tensor) # Calculate testing set MSE loss
    
print('Neural Network - Test Set MSE:', nn_test_loss.item()) # print NN testing set MSE
print('Neural Network - Test Set MAE:', nn_test_loss.item() ** (1 / 2)) # print NN testing set MSE

# We calculated these before:
print("Linear Regression - Test Set MSE:", linear_test_mse)
print("Linear Regression - Test Set MAE:", linear_test_mse ** (1 / 2))

print("% Change in MAE:", nn_test_loss.item() ** (1 / 2) / linear_test_mse ** (1 / 2) - 1)

Neural Network - Test Set MSE: 3.4577858448028564
Neural Network - Test Set MAE: 1.8595122599227079
Linear Regression - Test Set MSE: 4.488469496595208
Linear Regression - Test Set MAE: 2.1186008346536656
% Change in MAE: -0.12229230277505854
