# Iris Classification in PyTorch 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import pandas as pd

# Load the Iris dataset as Pandas Dataframe
* Dataframe is a tabular dataset

In [2]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

iris_data = pd.read_csv(url, header=None, names=columns)
iris_data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


# Map class labels to numerical values

In [3]:
class_mapping = {
    'Iris-setosa': 0, 
    'Iris-versicolor': 1, 
    'Iris-virginica': 2
}

iris_data['class'] = iris_data['class'].map(class_mapping)

In [4]:
iris_data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


# Split the data into features and labels

In [5]:
# features
X = iris_data.drop('class', axis=1).values
# labels
y = iris_data['class'].values

# Train/Test Split

In [6]:
# Numpy arrays here!
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
* Standart scaler -> zero mean, unit variance

In [7]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
print(X_train.mean(), X_train.var())
print(X_test.mean(), X_test.var())

7.401486830834377e-18 0.9999999999999997
0.09149145344393068 0.996861259412538


# Create Custom Dataset

In [9]:
class IrisDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels):

        # CREATE TENSORS
        self.features = torch.tensor(features, dtype=torch.float32)
        # long means 64 bit integer
        self.labels = torch.tensor(labels, dtype=torch.long)
        
    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        features = self.features[idx]
        labels = self.labels[idx]
        return features, labels

In [10]:
train_dataset = IrisDataset(
    features=X_train, 
    labels=y_train
)

test_dataset = IrisDataset(
    features=X_test, 
    labels=y_test
)

In [11]:
sample_data = train_dataset[0]
sample_data

(tensor([-1.4739,  1.2204, -1.5640, -1.3095]), tensor(0))

In [12]:
# features
print(sample_data[0])
print(sample_data[0].shape)

tensor([-1.4739,  1.2204, -1.5640, -1.3095])
torch.Size([4])


In [13]:
# labels
print(sample_data[1])
print(sample_data[1].shape)

tensor(0)
torch.Size([])


# Make Batches of Data

In [14]:
batch_size = 32

train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size
)

test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size
)

In [15]:
sample_batch = next(iter(train_dataloader))

In [16]:
# features
print(sample_batch[0].shape)

# labels
print(sample_batch[1].shape)

torch.Size([32, 4])
torch.Size([32])


# Define the MLP Model

In [17]:
class IrisMLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Set hyperparameters

In [18]:
input_size = 4  # Number of features
hidden_size = 8
num_classes = 3 # iris dataset has 3 
learning_rate = 0.01
num_epochs = 100

# Initialize the model

In [19]:
model = IrisMLP(input_size, hidden_size, num_classes)

# Loss & Optimizer

In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Training the model

### Training iteration function that loops over all the batches

In [21]:
def train_iter(model, dataloader, criterion, optimizer):
    model.train()

    loss_history = []

    for features_batch, labels_batch in dataloader:
        # Forward pass
        preds = model(features_batch)
        # Compute error
        loss = criterion(preds, labels_batch)
    
        # Clear previously computed gradients
        optimizer.zero_grad()
        # Compute gradients
        loss.backward()
        # Update parameters (weights and biases)
        optimizer.step()

        loss_history.append(loss.item())

    avg_loss = sum(loss_history)/len(loss_history)
    return avg_loss

### Evaluation (testing) iteration function that loops over all the batches

In [22]:
# DISABLE GRADIENT COMPUTATION
# NOT REQUIRED FOR TESTING 
# WE ARE NOT GOING TO TRAING THE MODEL IN THIS FUNCTION
@torch.no_grad()
def test_iter(model, dataloader, criterion):
    model.eval()

    loss_history = []
    acc_history = []

    for features_batch, labels_batch in dataloader:
        # Forward pass
        preds = model(features_batch)
        
        # Compute error
        loss = criterion(preds, labels_batch)
        # Compute accuracy
        _, predicted = torch.max(preds, 1)
        accuracy = (predicted == labels_batch).sum().item() / labels_batch.size(0)
    
        loss_history.append(loss.item())
        acc_history.append(accuracy)

    avg_loss = sum(loss_history)/len(loss_history)
    avg_acc = sum(acc_history)/len(acc_history)
    return avg_loss, avg_acc

### Start training

In [23]:
def start_training(model, train_dataloader, test_dataloader, optimizer, criterion, num_epochs, print_interval):
    
    # Loop over all epochs
    for epoch in range(1, num_epochs+1):
        avg_train_loss = train_iter(model, train_dataloader, criterion, optimizer)
        avg_test_loss, avg_test_acc = test_iter(model, test_dataloader, criterion)

        if (epoch + 1) % print_interval == 0:
            print(f'Epoch: [{epoch+1}/{num_epochs}], Avg train loss: {avg_train_loss:.4f}, test loss: {avg_test_loss:.4f}, test_acc: {avg_test_acc*100.0:.2f}%')

In [24]:
NUM_EPOCHS = 250
print_interval = 20 

start_training(
    model,
    train_dataloader,
    test_dataloader,
    optimizer,
    criterion,
    NUM_EPOCHS,
    print_interval
)

Epoch: [20/250], Avg train loss: 0.8737, test loss: 0.8588, test_acc: 63.33%
Epoch: [40/250], Avg train loss: 0.6962, test loss: 0.6763, test_acc: 80.00%
Epoch: [60/250], Avg train loss: 0.5744, test loss: 0.5442, test_acc: 96.67%
Epoch: [80/250], Avg train loss: 0.4905, test loss: 0.4507, test_acc: 93.33%
Epoch: [100/250], Avg train loss: 0.4341, test loss: 0.3873, test_acc: 93.33%
Epoch: [120/250], Avg train loss: 0.3944, test loss: 0.3428, test_acc: 93.33%
Epoch: [140/250], Avg train loss: 0.3648, test loss: 0.3097, test_acc: 93.33%
Epoch: [160/250], Avg train loss: 0.3413, test loss: 0.2837, test_acc: 93.33%
Epoch: [180/250], Avg train loss: 0.3219, test loss: 0.2625, test_acc: 93.33%
Epoch: [200/250], Avg train loss: 0.3052, test loss: 0.2447, test_acc: 93.33%
Epoch: [220/250], Avg train loss: 0.2904, test loss: 0.2296, test_acc: 96.67%
Epoch: [240/250], Avg train loss: 0.2767, test loss: 0.2162, test_acc: 96.67%
