In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
class TransformerClassifier(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim, num_layers):
        super(TransformerClassifier, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_dim, hidden_dim)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(hidden_dim, nhead=8),
            num_layers=num_layers
        )
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(1, 0, 2)
        output = self.transformer(x)
        output = output.mean(dim=0)  # Average over the sequence length
        output = self.fc(output)
        return output

In [None]:
iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42)

X_train = torch.from_numpy(X_train).long().to(device)
y_train = torch.from_numpy(y_train).long().to(device)

X_test = torch.from_numpy(X_test).long().to(device)
y_test = torch.from_numpy(y_test).long().to(device)

input_dim = 10  # Change input_dim to 10
output_dim = 3
hidden_dim = 32
num_layers = 2

model = TransformerClassifier(input_dim, output_dim, hidden_dim, num_layers)
model.to(device)

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(500):
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print("Epoch {}: Loss = {}".format(epoch, loss.item()))


Epoch 0: Loss = 1.090861439704895
Epoch 10: Loss = 0.32804256677627563
Epoch 20: Loss = 0.21982432901859283
Epoch 30: Loss = 0.18347498774528503
Epoch 40: Loss = 0.17048922181129456
Epoch 50: Loss = 0.16094906628131866
Epoch 60: Loss = 0.1555875986814499
Epoch 70: Loss = 0.15837770700454712
Epoch 80: Loss = 0.14791248738765717
Epoch 90: Loss = 0.13117173314094543
Epoch 100: Loss = 0.14329764246940613
Epoch 110: Loss = 0.12824688851833344
Epoch 120: Loss = 0.14189033210277557
Epoch 130: Loss = 0.1323809176683426
Epoch 140: Loss = 0.12141406536102295
Epoch 150: Loss = 0.1225205734372139
Epoch 160: Loss = 0.11050143092870712
Epoch 170: Loss = 0.11342358589172363
Epoch 180: Loss = 0.11312638968229294
Epoch 190: Loss = 0.11380262672901154
Epoch 200: Loss = 0.12031599134206772
Epoch 210: Loss = 0.10529523342847824
Epoch 220: Loss = 0.11727757751941681
Epoch 230: Loss = 0.11196236312389374
Epoch 240: Loss = 0.11746649444103241
Epoch 250: Loss = 0.11588415503501892
Epoch 260: Loss = 0.12332228

In [6]:
model.eval()

with torch.no_grad():
    y_pred = model(X_test)
    loss_fn = nn.CrossEntropyLoss()
    test_loss = loss_fn(y_pred, y_test)
    test_acc = (y_pred.argmax(dim=1) == y_test).float().mean()

print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_acc:.4f}")

Test loss: 0.1061, Test accuracy: 0.9667
