In [12]:
# As you can see we now require a lot of different modules
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from torch.utils.data import TensorDataset, DataLoader 

import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time, copy

  from .autonotebook import tqdm as notebook_tqdm
  Referenced from: <FF2AA480-7137-3398-BD60-7957E5890536> /Users/fadominguez/opt/anaconda3/envs/NeuralNetworks/lib/python3.9/site-packages/torchvision/image.so
  warn(


In [21]:
# Importing the dataset into Pandas
column_names = ["Sex", "Length", "Diameter", "Height", "Whole weight", 
                "Shucked weight", "Viscera weight", "Shell weights", "Rings"]
df_abalone = pd.read_csv('abalone.data', header=None, names=column_names)

# By default, abalone is Young (0)
df_abalone['Old'] = 0

# Assign rings > 10 as Old (1)
df_abalone.loc[(df_abalone['Rings'] >= 10), 'Old'] = 1

# Class labels [0,1] [N, P]
class_labels = ['Young', 'Old']

# Select only numerical features first up to whole weight
num_features_columns = column_names[1:5]

label_cokumn = 'Old'

df_abalone.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weights,Rings,Old
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,1
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,0
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,0
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,1
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,0


In [34]:
# Split dataset into 3 different subsets. Train subset will be the largest
# The train subset will be subdivided into the validation subset
# Additionally, standardization will be performed (Stadard Scalar)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

# ct = Column Transformer
# Column Transformer 'standardize' the data. It is an argument from outside the function
def train_test_val_split(df, feature_columns, label_column, ct):
    # Define train test as 80% of the data
    initial_train_test_split = df.sample(frac = 0.8, random_state = 42)
    
    # remove train data from the original dataset and store it in a new variable
    test_data = df.drop(initial_train_test_split.index)

    # Perform a 75-25% split for validation data over the train data
    train_data = initial_train_test_split.sample(frac = 0.75, random_state = 42)
    
    # remove train_data from initial_train_test_split dataframe
    val_data = initial_train_test_split.drop(train_data.index)
    
    # rename all train datasets in features and labels (x's and y's)
    train_x = train_data[feature_columns]
    
    # not transforming the labels, they go straight to the tensor
    train_y = torch.from_numpy(train_data[label_column].values)
    
    # rename all val datasets in features and labels (x's and y's)
    val_x = val_data[feature_columns]
    
    # not transforming the labels, they go straight to the tensor
    val_y = torch.from_numpy(val_data[label_column].values)
    
    # rename all val datasets in features and labels (x's and y's)
    test_x = test_data[feature_columns]
    
    # not transforming the labels, they go straight to the tensor
    test_y = torch.from_numpy(test_data[label_column].values)
    
    # fit columntransformer
    ct.fit(train_x)
    
    # Perform standardization on each x
    train_x = ct.transform(train_x)
    val_x   = ct.transform(val_x)
    test_x  = ct.transform(test_x)
    
    # transform datasets into Torch tensors
    train_x = torch.from_numpy(train_x).float()
    val_x   = torch.from_numpy(val_x).float()
    test_x  = torch.from_numpy(test_x).float()
    
    # Create the correspondent datasets
    train_dataset = TensorDataset(train_x, train_y)
    val_dataset   = TensorDataset(val_x, val_y)
    test_dataset  = TensorDataset(test_x, test_y)
    
    return train_dataset, val_dataset, test_dataset
    

In [35]:
# Create a column transformer based on a Stadard Scaler
ct = ColumnTransformer([('numerical_features', StandardScaler(), num_features_columns)], remainder = 'passthrough')

train_dataset, val_dataset, test_dataset = train_test_val_split(df_abalone, num_features_columns, label_cokumn, ct)

In [47]:
# Set up mps as the device
device = torch.device('mps')

input_size = len(num_features_columns)

hidden_size1 = 64
hidden_size2 = 64
hidden_size3 = 64
num_classes = 2

# External training parameters
batch_size = 10
learning_rate = 0.001
num_epochs = 50

In [46]:
# Define a simple classiffier 2 hidden lyers model
class SimpleClassifier2Layer(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        
        super(SimpleClassifier2Layer, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size1),
            nn.ReLU(),
            nn.Linear(hidden_size1, hidden_size2),
            nn.ReLU(),
            nn.Linear(hidden_size2, num_classes),
        )

    def forward(self, x):
        return self.layers(x)
    
# Simple three-hidden-layer classification model
class SimpleClassifier3Layer(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, num_classes):
        super(SimpleClassifier3Layer, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size1),
            nn.ReLU(),
            nn.Linear(hidden_size1, hidden_size2),
            nn.ReLU(),
            nn.Linear(hidden_size2, hidden_size3),
            nn.ReLU(),
            nn.Linear(hidden_size3, num_classes),
        )

    def forward(self, x):
        return self.layers(x)

In [48]:
two_layer_model = SimpleClassifier2Layer(input_size, hidden_size1, hidden_size2, num_classes)
print(two_layer_model)

SimpleClassifier2Layer(
  (layers): Sequential(
    (0): Linear(in_features=4, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=2, bias=True)
  )
)


In [49]:
help(nn.Module)

Help on class Module in module torch.nn.modules.module:

class Module(builtins.object)
 |  Module() -> None
 |  
 |  Base class for all neural network modules.
 |  
 |  Your models should also subclass this class.
 |  
 |  Modules can also contain other Modules, allowing to nest them in
 |  a tree structure. You can assign the submodules as regular attributes::
 |  
 |      import torch.nn as nn
 |      import torch.nn.functional as F
 |  
 |      class Model(nn.Module):
 |          def __init__(self):
 |              super().__init__()
 |              self.conv1 = nn.Conv2d(1, 20, 5)
 |              self.conv2 = nn.Conv2d(20, 20, 5)
 |  
 |          def forward(self, x):
 |              x = F.relu(self.conv1(x))
 |              return F.relu(self.conv2(x))
 |  
 |  Submodules assigned in this way will be registered, and will have their
 |  parameters converted too when you call :meth:`to`, etc.
 |  
 |  .. note::
 |      As per the example above, an ``__init__()`` call to the parent c