In [232]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [233]:
df = pd.read_csv('iris_data.csv', delimiter=',')

In [234]:
# Remove ID column
data = df.drop(columns=['Id'])

In [235]:
"""
Input Columns: SepalLengthCm, SepalWidthCm, PetalLengthCm, PetalWidthCm
Target Columns: SetosaScore, VersicolorScore, VirginicaScore
"""
x = data.drop(columns=['SetosaScore', 'VersicolorScore', 'VirginicaScore']).values
y = df.loc[0:, ['SetosaScore', 'VersicolorScore', 'VirginicaScore']].values

In [236]:
# pip install torch scikit-learn
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

In [237]:
"""

Additional guidelines are as follows:

Ensure the training loss and the test loss are less than 0.27. 
Use only Linear layers in the neural network.
Do not use any activation functions like ReLU or others
Do not use any regularization
Use Adam optimizer
Use MAE (Mean Absolute Loss)
Split the dataset to train-test with a ratio of 80% for training and 20% for testing.
"""

'\n\nAdditional guidelines are as follows:\n\nEnsure the training loss and the test loss are less than 0.27. \nUse only Linear layers in the neural network.\nDo not use any activation functions like ReLU or others\nDo not use any regularization\nUse Adam optimizer\nUse MAE (Mean Absolute Loss)\nSplit the dataset to train-test with a ratio of 80% for training and 20% for testing.\n'

In [238]:
# Split the data into training and test sets (80/20 split)]
x_train, x_test, y_train, y_test = \
train_test_split(x, y, test_size=0.2, random_state=42)
#  Specifies the proportion of the dataset that 
# should be allocated to the test set. 
# Here, 0.2 means 20% of the data will be used
# for testing, and the remaining 80% will be 
# used for training.

In [239]:
# Standarize the data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# StandardScaler: This is a class from 
# Scikit-learn used to standardize 
# features by removing the mean and 
# scaling to unit variance.
# Standardization: This process involves
# rescaling the features so that they have 
# a mean of 0 and a standard deviation of 1.
# This is important for many machine learning
# algorithms that perform better when 
# features are on a similar scale.

# This process is crucial for ensuring 
# that the model performs consistently,
# as many machine learning algorithms 
# assume or perform better when the 
# input features are on a similar scale.

In [240]:
# Convert the data to PyTorch sensors
x_train_tensor = torch.tensor(
    x_train, dtype=torch.float32
)
# torch.tensor(X_train): Converts 
# the X_train data (which is likely 
# a NumPy array or a Pandas 
# DataFrame) into a PyTorch tensor.
# dtype=torch.float32: Specifies that 
# the data type of the tensor should 
# be float32, which is a common 
# choice for numerical data in 
# deep learning models.
# Result: X_train_tensor is now a 
# PyTorch tensor containing the 
# standardized training input data, 
# ready for use in a neural network.

y_train_tensor = torch.tensor(
    y_train, dtype=torch.float32)

x_test_tensor = torch.tensor(
    x_test, dtype=torch.float32
)

y_test_tensor = torch.tensor(
    y_test, dtype=torch.float32)

In [241]:
# Create DataLoader for batching
train_dataset = TensorDataset(
    x_train_tensor, y_train_tensor
)
train_loader = DataLoader(
    train_dataset, batch_size=15, shuffle=True
)

In [242]:
# Define the neural network model
#  defines a new class NeuralNetwork 
# that inherits from nn.Module. 
# By inheriting from nn.Module, 
# the NeuralNetwork class gains access 
# to all the functionalities provided 
# by PyTorch for building and managing neural networks.
class NeuralNetwork(nn.Module):
    # Start by calling the constructor of 
    # the parent class (nn.Module), ensuring 
    # that the class is properly initialized.
    def __init__(self) -> None: # Constructor
        super(NeuralNetwork, self).__init__() 
        # nn.Linear creates a fully connected (linear) layer.
        self.fc1 = nn.Linear(x_train.shape[1], 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 512)
        self.fc4 = nn.Linear(512, 3)

    # The forward method defines the forward pass of 
    # the network. It specifies how the input tensor
    # x should flow through the layers of the network.
    def forward(self, x):
        x = self.fc1(x) # No activation after the first layer
        x = self.fc2(x) # No activation afer the second layer
        x = self.fc3(x) # No activation afer the second layer
        x = self.fc4(x) # Output layer

        return x

In [243]:
# Initialize the model, loss function, and optimizer
model = NeuralNetwork()
criterion = nn.L1Loss() # for mean absolute loss

optimizer = optim.Adam(
    model.parameters(), lr= 0.001 #learning rate of 0.01
)

In [244]:
# Training loop
epochs = 300
for epoch in range(epochs):
    # Set the model to training mode. This is 
    # important because certain layers, such 
    # as dropout or batch normalization, behave
    # differently during training than during 
    # evaluation. model.train() ensures that 
    # these layers are in training mode.
    model.train()
    total_loss = 0 # Initialize total loss for the epoch

    for batch_x, batch_y in train_loader:
        # Resets the gradients of all model parameters
        # to zero before starting the backpropagation
        # process for the current batch. This is 
        # important because gradients are accumulated
        # by default in PyTorch, so they need to be
        # cleared out before calculating the gradients
        # for the current batch
        optimizer.zero_grad()
        predictions = model(batch_x)
        # Compute the loss between the model's 
        # predictions and the actual target values 
        # (batch_y).
        loss = criterion(predictions ,batch_y)
        # Compute the gradients of the loss with 
        # respect to each model parameter using 
        # backpropagation. These gradients are 
        # used to update the model parameters 
        # in the next step.
        loss.backward()

        optimizer.step()

        total_loss += loss.item() # Accumulate the loss for each batch

    average_loss = total_loss / len(train_loader) # Compute the average loss for the epoch    
    print(f'Epoch: {epoch+1}, Average Loss: {average_loss:.4f}')



Epoch: 1, Average Loss: 0.3954
Epoch: 2, Average Loss: 0.3397
Epoch: 3, Average Loss: 0.2996
Epoch: 4, Average Loss: 0.2821
Epoch: 5, Average Loss: 0.2900
Epoch: 6, Average Loss: 0.2753
Epoch: 7, Average Loss: 0.2826
Epoch: 8, Average Loss: 0.2900
Epoch: 9, Average Loss: 0.2839
Epoch: 10, Average Loss: 0.2738
Epoch: 11, Average Loss: 0.2701
Epoch: 12, Average Loss: 0.2764
Epoch: 13, Average Loss: 0.2742
Epoch: 14, Average Loss: 0.2695
Epoch: 15, Average Loss: 0.2758
Epoch: 16, Average Loss: 0.2746
Epoch: 17, Average Loss: 0.2747
Epoch: 18, Average Loss: 0.2738
Epoch: 19, Average Loss: 0.2759
Epoch: 20, Average Loss: 0.2744
Epoch: 21, Average Loss: 0.2691
Epoch: 22, Average Loss: 0.2739
Epoch: 23, Average Loss: 0.2769
Epoch: 24, Average Loss: 0.2713
Epoch: 25, Average Loss: 0.2707
Epoch: 26, Average Loss: 0.2708
Epoch: 27, Average Loss: 0.2748
Epoch: 28, Average Loss: 0.2850
Epoch: 29, Average Loss: 0.2796
Epoch: 30, Average Loss: 0.2697
Epoch: 31, Average Loss: 0.2739
Epoch: 32, Averag

In [245]:
# Evaluate the model on the test set
# Switch to Evaluation Mode. In this mode, 
# certain layers like dropout and batch normalization, 
# which behave differently during training, will 
# operate in evaluation mode, meaning they won't 
# apply dropout or update running statistics.
# Why Use eval()?: This ensures that the 
# model's behavior is consistent during 
# testing and that the evaluation reflects
# the true performance on unseen data.
model.eval()

# Disabling Gradient Calculation.
# The torch.no_grad() context manager 
# temporarily disables gradient computation. 
# Since gradients are only necessary during 
# training (when you need to update the 
# model's parameters), disabling them 
# during evaluation saves memory and 
# computational resources because
# pytorch will not track the operations
# for that it might need later for gradient
# computation.
with torch.no_grad():
    test_predictions = model(x_test_tensor)
    test_loss = criterion(
        test_predictions, y_test_tensor 
    )
    print(f'Test loss: {test_loss.item():.4f}')

Test loss: 0.2024


In [246]:
[test_predictions, y_test_tensor]

[tensor([[ 9.2261e-01, -2.0012e-01,  1.3715e-01],
         [-5.7178e-02,  3.7860e-02,  1.0062e+00],
         [-2.1481e-01,  4.2299e-01,  7.7030e-01],
         [ 1.1083e+00, -2.9851e-01,  3.6696e-02],
         [-1.7011e-01,  4.8103e-01,  6.5104e-01],
         [ 9.7009e-02,  2.7910e-01,  5.5344e-01],
         [-7.6437e-02,  4.9686e-01,  5.4010e-01],
         [ 8.3636e-01,  1.8970e-01, -1.9212e-01],
         [ 9.9615e-01, -2.4849e-01,  1.1810e-01],
         [ 1.3644e-01,  4.3973e-01,  3.8008e-01],
         [ 2.8519e-02,  6.3237e-01,  2.5237e-01],
         [-5.3527e-01,  8.5261e-01,  7.0272e-01],
         [ 1.0133e+00, -2.1706e-01,  5.7759e-02],
         [-1.6452e-01,  1.2091e-01,  1.0574e+00],
         [ 1.1543e+00, -3.7642e-01,  7.9668e-02],
         [-1.1076e-01,  7.5625e-01,  3.0102e-01],
         [-1.7739e-01,  1.9051e-01,  9.8405e-01],
         [ 8.7685e-01, -1.2942e-02, -6.8184e-03],
         [ 8.9077e-01, -1.6516e-01,  1.4084e-01],
         [ 8.2416e-01,  1.8838e-01, -1.7378e-01],


In [247]:
def print_weights_biases(model):
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(f'Layer: {name}')
            print(f'Values:\n{param.data}\n')

print_weights_biases(model=model)

Layer: fc1.weight
Values:
tensor([[-4.2568e-01, -1.0566e-01,  2.7220e-01,  8.6577e-02],
        [-2.2175e-01,  3.5688e-04,  1.4625e-01,  4.2069e-01],
        [-3.6319e-01,  1.9249e-01,  2.2800e-01,  1.1494e-01],
        [-1.1437e-01, -3.3795e-01, -4.3651e-01,  4.9292e-01],
        [-6.0700e-02,  1.4398e-01, -4.2095e-01, -1.9919e-01],
        [ 6.4240e-03,  2.2123e-01,  2.1355e-01,  3.2669e-01],
        [ 3.4908e-01,  3.3746e-01, -4.2802e-02,  1.2463e-02],
        [-4.6346e-01, -1.7136e-01,  3.7534e-01,  2.0889e-01],
        [-1.7853e-01,  1.8727e-01,  2.7845e-01, -2.4135e-01],
        [-3.0662e-01, -1.7082e-01, -1.2800e-01,  8.9629e-02],
        [ 5.3531e-02,  4.0967e-01, -2.0708e-01,  2.7043e-01],
        [ 4.9612e-01,  2.2916e-01, -2.6490e-01, -2.7210e-01],
        [-3.6065e-01, -1.8019e-01, -1.6448e-01,  3.7999e-01],
        [ 1.5343e-01,  2.3378e-01, -5.4191e-02, -3.4199e-02],
        [-5.5395e-02, -3.4059e-01, -3.8765e-01,  2.6986e-01],
        [-1.5118e-01, -2.4179e-01,  2.2245e-

In [248]:
my_test_data = [
            [0.05, 0.05, 0.05, 0.05],
            [8, 4, 7, 4],
            [6.99, 2.8, 5, 1],
            [5.9, 3.5, 2, 1],
            [6.3, 3.2, 6, 2.2]

            ]

# Transform data uring scaler
my_data_transformed = scaler.transform(my_test_data)

# Turn transformed data into tensor
my_data_tensor = torch.tensor(
    my_data_transformed, dtype=torch.float32
)

In [249]:
model.eval()
with torch.no_grad():
    predictions = model(my_data_tensor)

for i, prediction in enumerate(predictions):
    print(f'Prediction {i + 1}:')
    print(f'- Setosa Score: {prediction[0]:.4f}')
    print(f'- Versicolor Score: {prediction[1]:.4f}')
    print(f'- Virginia Score: {prediction[2]:.4f}')

Prediction 1:
- Setosa Score: 0.2507
- Versicolor Score: 1.2478
- Virginia Score: -0.7687
Prediction 2:
- Setosa Score: -0.2857
- Versicolor Score: -0.7227
- Virginia Score: 2.1146
Prediction 3:
- Setosa Score: -0.0084
- Versicolor Score: 0.9134
- Virginia Score: 0.0331
Prediction 4:
- Setosa Score: 0.7945
- Versicolor Score: -0.1490
- Virginia Score: 0.2609
Prediction 5:
- Setosa Score: -0.1996
- Versicolor Score: 0.1764
- Virginia Score: 1.0047


In [250]:
my_test_data

[[0.05, 0.05, 0.05, 0.05],
 [8, 4, 7, 4],
 [6.99, 2.8, 5, 1],
 [5.9, 3.5, 2, 1],
 [6.3, 3.2, 6, 2.2]]

In [251]:
my_data_transformed

array([[-7.28511354, -7.19626564, -2.32016138, -1.66656262],
       [ 2.66050512,  2.45850343,  1.86546395,  3.77761852],
       [ 1.39697369, -0.47459097,  0.66096745, -0.3572026 ],
       [ 0.03336057,  1.23638076, -1.14577729, -0.3572026 ],
       [ 0.53376905,  0.50310716,  1.2632157 ,  1.29672585]])