# Use Transformer model to classify Iris data

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="enable_nested_tensor is True.*")

In [24]:
# Data load and process
iris = load_iris()
X = iris['data']
y = iris['target']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Convert to tensor
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [25]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, num_classes, dim_feedforward=512, nhead=4, num_layers=1, dropout=0.5):
        super(TransformerModel, self).__init__()
        self.input_embedding = nn.Linear(input_dim, dim_feedforward)
        self.pos_encoder = nn.Parameter(torch.zeros(dim_feedforward))
        transformer_layer = nn.TransformerEncoderLayer(d_model=dim_feedforward, nhead=nhead, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(transformer_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(dim_feedforward, num_classes)
    
    def forward(self, x):
        x = self.input_embedding(x) + self.pos_encoder
        x = x.unsqueeze(1)  # increase 1 dimension to conform to the expected data input of Transformer
        x = self.transformer_encoder(x)
        x = x.squeeze(1)  # undo 1 dimension added before
        x = self.fc_out(x)
        return x

In [26]:
model = TransformerModel(input_dim=4, num_classes=3)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [27]:
# 训练模型
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for data, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

In [28]:
# 测试模型
def evaluate(model, test_loader):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for data, targets in test_loader:
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    return correct / total

In [29]:
# 训练和测试循环
for epoch in range(10):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    test_accuracy = evaluate(model, test_loader)
    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Test Accuracy = {test_accuracy:.4f}")

Epoch 1: Train Loss = 0.5873, Test Accuracy = 0.8444
Epoch 2: Train Loss = 0.2598, Test Accuracy = 0.9111
Epoch 3: Train Loss = 0.3777, Test Accuracy = 0.9556
Epoch 4: Train Loss = 0.1623, Test Accuracy = 0.9778
Epoch 5: Train Loss = 0.2404, Test Accuracy = 1.0000
Epoch 6: Train Loss = 0.2139, Test Accuracy = 1.0000
Epoch 7: Train Loss = 0.2108, Test Accuracy = 1.0000
Epoch 8: Train Loss = 0.2228, Test Accuracy = 0.9556
Epoch 9: Train Loss = 0.2916, Test Accuracy = 1.0000
Epoch 10: Train Loss = 0.1647, Test Accuracy = 1.0000
