# 1 Classification of Iris Dataset

### David Nicolay 26296918

In [1]:
# imports
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim

# import my utils
import sys
sys.path.append('../')
from src import data_utils, models

## Data pre-processing

In [2]:
df = pd.read_csv("../datasets/iris.csv")
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
# check for missing values
df.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [4]:
df.shape

(150, 6)

### 1. Drop ID column

In [5]:
df = df.drop(columns=["Id"])

### 2. One-hot encode target variable

In [6]:
# one hot encode target variable
df = pd.get_dummies(df, columns=["Species"])
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species_Iris-setosa,Species_Iris-versicolor,Species_Iris-virginica
0,5.1,3.5,1.4,0.2,True,False,False
1,4.9,3.0,1.4,0.2,True,False,False
2,4.7,3.2,1.3,0.2,True,False,False
3,4.6,3.1,1.5,0.2,True,False,False
4,5.0,3.6,1.4,0.2,True,False,False


### 3. Scale features with z-score normalisation

In [7]:
print(dir(data_utils))

['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'scale_z_score']


In [8]:
# scale features to [-1, 1]
feature_cols = df.columns[:-3]  # all columns except the last 3 (one-hot encoded target)

df[feature_cols] = data_utils.scale_z_score(df[feature_cols])
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species_Iris-setosa,Species_Iris-versicolor,Species_Iris-virginica
0,-0.897674,1.028611,-1.336794,-1.308593,True,False,False
1,-1.1392,-0.12454,-1.336794,-1.308593,True,False,False
2,-1.380727,0.33672,-1.39347,-1.308593,True,False,False
3,-1.50149,0.10609,-1.280118,-1.308593,True,False,False
4,-1.018437,1.259242,-1.336794,-1.308593,True,False,False


### 4. Prepare X and Y matrices and tensors


In [9]:
# Prepare data
X = df[feature_cols].values
y = df[["Species_Iris-setosa", "Species_Iris-versicolor", "Species_Iris-virginica"]].values

# Convert to torch tensors
X_tensor = torch.FloatTensor(X)
y_tensor = torch.FloatTensor(y)

## Modelling

### Passive learning with stochastic gradient descent

#### Find best control parameters

In [10]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=12, stratify=y)

# Hyperparameter search function
def train_and_evaluate(hidden_size, learning_rate, weight_decay, epochs=1000):
    model = models.IrisNet(input_size=4, hidden_size=hidden_size, output_size=3)
    
    # Use CrossEntropyLoss (combines LogSoftmax and NLLLoss)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    # Convert labels to class indices for CrossEntropyLoss
    y_train_idx = torch.argmax(y_train, dim=1)
    y_test_idx = torch.argmax(y_test, dim=1)
    
    # Training
    losses = []
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train_idx)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    
    # Evaluation
    model.eval()
    with torch.no_grad():
        train_outputs = model(X_train)
        test_outputs = model(X_test)
        
        train_pred = torch.argmax(train_outputs, dim=1)
        test_pred = torch.argmax(test_outputs, dim=1)
        
        train_acc = accuracy_score(y_train_idx.numpy(), train_pred.numpy())
        test_acc = accuracy_score(y_test_idx.numpy(), test_pred.numpy())
    
    return model, train_acc, test_acc, losses

# Hyperparameter grid search
hidden_sizes = [64, 128, 256, 512]
learning_rates = [0.01, 0.05, 0.1, 0.5]
weight_decays = [0.0, 0.001, 0.01, 0.1]

best_params = {}
best_test_acc = 0
results = []

print("Searching for best hyperparameters...")
for hidden_size in hidden_sizes:
    for lr in learning_rates:
        for wd in weight_decays:
            model, train_acc, test_acc, losses = train_and_evaluate(hidden_size, lr, wd)
            results.append({
                'hidden_size': hidden_size,
                'learning_rate': lr,
                'weight_decay': wd,
                'train_acc': train_acc,
                'test_acc': test_acc
            })
            
            if test_acc > best_test_acc:
                best_test_acc = test_acc
                best_params = {
                    'hidden_size': hidden_size,
                    'learning_rate': lr,
                    'weight_decay': wd,
                    'train_acc': train_acc,
                    'test_acc': test_acc
                }
            
            print(f"Hidden: {hidden_size}, LR: {lr}, WD: {wd}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")

print(f"\nBest parameters: {best_params}")



Searching for best hyperparameters...
Hidden: 64, LR: 0.01, WD: 0.0, Train Acc: 0.9500, Test Acc: 0.9667
Hidden: 64, LR: 0.01, WD: 0.001, Train Acc: 0.9500, Test Acc: 0.9667
Hidden: 64, LR: 0.01, WD: 0.01, Train Acc: 0.9500, Test Acc: 0.9333
Hidden: 64, LR: 0.01, WD: 0.1, Train Acc: 0.8917, Test Acc: 0.9000
Hidden: 64, LR: 0.05, WD: 0.0, Train Acc: 0.9750, Test Acc: 0.9667
Hidden: 64, LR: 0.05, WD: 0.001, Train Acc: 0.9750, Test Acc: 0.9667
Hidden: 64, LR: 0.05, WD: 0.01, Train Acc: 0.9667, Test Acc: 0.9667
Hidden: 64, LR: 0.05, WD: 0.1, Train Acc: 0.9167, Test Acc: 0.9333
Hidden: 64, LR: 0.1, WD: 0.0, Train Acc: 0.9833, Test Acc: 0.9667
Hidden: 64, LR: 0.1, WD: 0.001, Train Acc: 0.9833, Test Acc: 0.9667
Hidden: 64, LR: 0.1, WD: 0.01, Train Acc: 0.9667, Test Acc: 0.9667
Hidden: 64, LR: 0.1, WD: 0.1, Train Acc: 0.9167, Test Acc: 0.9333
Hidden: 64, LR: 0.5, WD: 0.0, Train Acc: 0.9833, Test Acc: 0.9667
Hidden: 64, LR: 0.5, WD: 0.001, Train Acc: 0.9833, Test Acc: 0.9667
Hidden: 64, LR: 0.5

#### Run 50 trials with best control parameters

### Active learning with output sensitivity analysis

### Active learning with uncertainty sampling

## Evaluation