In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Define the neural network model
class MultiClassModel(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MultiClassModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 64)
        self.relu1 = nn.ReLU()
        self.layer2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.output = nn.Linear(32, num_classes)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = self.relu1(self.layer1(x))
        x = self.relu2(self.layer2(x))
        x = self.softmax(self.output(x))
        return x

# Load and preprocess Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert to PyTorch tensors
X = torch.FloatTensor(X)
# nn.CrossEntropyLoss expects the target tensor to have the torch.int64 (long) data type
y = torch.tensor(y, dtype=torch.int64)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model parameters
input_dim = X.shape[1]
num_classes = len(np.unique(y))

# Initialize model, loss, and optimizer
model = MultiClassModel(input_dim, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batch_size = 32
n_batches = len(X_train) // batch_size

for epoch in range(num_epochs):
    model.train()
    for i in range(0, len(X_train), batch_size):
        batch_X = X_train[i:i+batch_size]
        batch_y = y_train[i:i+batch_size]
        
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Print progress every 20 epochs
    if (epoch + 1) % 20 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Print model architecture
print('\nModel Architecture:')
print(model)

Epoch [20/100], Loss: 0.7266
Epoch [40/100], Loss: 0.6070
Epoch [60/100], Loss: 0.5695
Epoch [80/100], Loss: 0.5610
Epoch [100/100], Loss: 0.5574

Model Architecture:
MultiClassModel(
  (layer1): Linear(in_features=4, out_features=64, bias=True)
  (relu1): ReLU()
  (layer2): Linear(in_features=64, out_features=32, bias=True)
  (relu2): ReLU()
  (output): Linear(in_features=32, out_features=3, bias=True)
  (softmax): Softmax(dim=1)
)


In [3]:
# Evaluate model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    _, predicted = torch.max(test_outputs, 1)
    accuracy = (predicted == y_test).float().mean()
    print(f'\nTest Accuracy: {accuracy:.4f}')


Test Accuracy: 1.0000


In [4]:
print("The first prediction", test_outputs[0].tolist())

The first prediction [0.000702839985024184, 0.9973917007446289, 0.001905460492707789]


In [5]:
predicted

tensor([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2,
        2, 2, 2, 2, 0, 0])