<a href="https://colab.research.google.com/github/eatprotein/draughts/blob/main/ML_transformer_drybean.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1:Download the dataset

In [None]:
#set up drives for resources.  Change the path as necessary

from google.colab import drive
#mount google drive
drive.mount('/content/drive/')
import sys
sys.path.append('/content/drive/My Drive/NLE Notebooks/resources/')


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

# Download and save the dataset
dataset_url = "/content/Dry_Bean_Dataset.xlsx"



# Step 2: Preprocess the data


In [None]:
df = pd.read_excel(dataset_url)


for col in df.columns:
  if col != 'Class':
    df[col] = pd.to_numeric(df[col], errors='coerce').astype(float)

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,Class
0,28395.0,610.291,208.178117,173.888747,1.197191,0.549812,28715.0,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724,SEKER
1,28734.0,638.018,200.524796,182.734419,1.097356,0.411785,29172.0,191.27275,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.99843,SEKER
2,29380.0,624.11,212.82613,175.931143,1.209713,0.562727,29690.0,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066,SEKER
3,30008.0,645.884,210.557999,182.516516,1.153638,0.498616,30724.0,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199,SEKER
4,30140.0,620.134,201.847882,190.279279,1.060798,0.33368,30417.0,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.9419,0.999166,SEKER


In [None]:
# Let's create the model

# If a GPU is available, try comparing performance between CPU and GPU
request_GPU = True

# Use GPU if supported and requested, CPU otherwise 
if torch.cuda.is_available() and request_GPU:
    device = torch.device('cuda')
    print(torch.cuda.get_device_name(0))
else:
    device = torch.device('cpu')
    print('Using the CPU')

Tesla T4


In [None]:
from sklearn.preprocessing import LabelEncoder

# Create a label encoder
le = LabelEncoder()

# Fit the label encoder and transform the 'Class' column
df['Class'] = le.fit_transform(df['Class'])


# Step 3: Split the dataset 

In [None]:
# Create DataLoader objects for batching and shuffling the data during training
batch_size = 32

class BeanDataset(TensorDataset):
    def __init__(self, features, targets):
      self.features = np.array(features, dtype=np.float32)
      self.targets = np.array(targets, dtype=np.float32)

    def __len__(self):
      return len(self.features)

    def __getitem__(self, idx):
      # Convert the data to PyTorch tensors
      return torch.tensor(self.features[idx]), torch.tensor(self.targets[idx])


In [None]:
# Split the data into train, validation, and test sets
X = df.drop('Class', axis=1)
y = df['Class']

# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)


# Split the data into training, validation, and test sets
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, stratify=y_temp, random_state=42)

print('Training set:', X_train.shape, y_train.shape)
print('Validation set:', X_val.shape, y_val.shape)
print('Test set:', X_test.shape, y_test.shape)

# Create DataLoader objects for batching and shuffling the data during training
batch_size = 32

train_dataset = BeanDataset(X_train, y_train)
val_dataset = BeanDataset(X_val, y_val)    # New validation dataset
test_dataset = BeanDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)    # New validation loader
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Training set: (8166, 16) (8166,)
Validation set: (2722, 16) (2722,)
Test set: (2723, 16) (2723,)


# Step 4: Build the Transformer model


In [None]:
import torch
import torch.nn as nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer


class TransformerModel(nn.Module):
  def __init__(self, input_dim, output_dim, hidden_dim, num_layers, num_heads, dropout):
    # Define Transformer model architecture using nn.TransformerEncoder
    super(TransformerModel, self).__init__()
    self.embedding = nn.Linear(input_dim, hidden_dim)
    encoder_layer = TransformerEncoderLayer(hidden_dim, num_heads, dim_feedforward=hidden_dim, dropout=dropout)
    self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers)
    self.fc = nn.Linear(hidden_dim, output_dim)

  def forward(self, x):
    # Implement the forward pass logic of Transformer model
    x = self.embedding(x)
    x = x.unsqueeze(1)  # Add an additional dimension for sequence length
    x = self.transformer_encoder(x)
    x = x.mean(dim=1)  # Average pooling over the sequence length
    output = self.fc(x)
    return output



In [None]:
# Initialize the model
hidden_dim = 64
num_layers = 2
num_heads = 4
dropout = 0.1

# Step 5: Train the model

In [None]:
# Define hyperparameters
input_dim = X_train.shape[1]  # Number of input features
output_dim = len(df['Class'].unique())  # Number of output classes
hidden_dim = 64
num_epochs = 20
learning_rate = 0.001

# Initialize the model
model = TransformerModel(input_dim, output_dim, hidden_dim, num_layers, num_heads, dropout)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
import matplotlib.pyplot as plt

def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10):
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
  
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
        
        avg_train_loss = train_loss / len(train_loader.dataset)
        train_losses.append(avg_train_loss)
        
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels.long())
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        avg_val_loss = val_loss / len(val_loader.dataset)
        val_losses.append(avg_val_loss)
        val_accuracy = correct / total

        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.4f}")
        
        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
        else:
            print("Early stopping")
            break

    # Plotting the losses
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    return model


In [None]:
# def train_model(model, criterion, optimizer, train_loader, num_epochs=10):

#     model.train()
#     train_loss = 0.0
#     for inputs, labels in train_loader:
#       optimizer.zero_grad()
#       outputs = model(inputs)
#       loss = criterion(outputs, labels.long())
#       loss.backward()
#       optimizer.step()
#       train_loss += loss.item() * inputs.size(0)
#     model.eval()
#     val_loss = 0.0
#     correct = 0
#     total = 0
#     with torch.no_grad():
#       for inputs, labels in val_loader:
#         outputs = model(inputs)
#         loss = criterion(outputs, labels.long())
#         val_loss += loss.item() * inputs.size(0)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()
#     avg_train_loss = train_loss / len(train_loader.dataset)
#     avg_val_loss = val_loss / len(val_loader.dataset)
#     val_accuracy = correct / total

#     print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.4f}")
#   return model


# Step 6: Evaluate the model on the test set


In [None]:
def evaluate_model(model, criterion, test_loader):
  model.eval()
  test_loss = 0.0
  correct = 0
  total = 0
  with torch.no_grad():
    for inputs, labels in val_loader:
      outputs = model(inputs)
      loss = criterion(outputs, labels.long())
      test_loss += loss.item() * inputs.size(0)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

  avg_test_loss = test_loss / len(test_loader.dataset)
  test_accuracy = correct / total
  print(f"Test Loss: {avg_test_loss:.4f} | Test Acc: {test_accuracy:.4f}")
  return test_accuracy


# Step 7: Iterate and fine-tune if necessary



In [None]:
# Based on the evaluation results, adjust hyperparameters, change model architecture, 
# or try different preprocessing techniques.

In [None]:
# Define a set of hyperparameters to try
hidden_dims = [64, 128, 256]
num_layers_list = [2, 3, 4]
num_heads_list = [4, 8, 16]
dropouts = [0.1, 0.2, 0.3]

# Keep track of the best model and its performance
best_model = None
best_accuracy = 0.0

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()

# Iterate over all combinations of hyperparameters
for hidden_dim in hidden_dims:
  for num_layers in num_layers_list:
    for num_heads in num_heads_list:
      for dropout in dropouts:
        # Create and train a new model with this set of hyperparameters
        model = TransformerModel(input_dim, output_dim, hidden_dim, num_layers, num_heads, dropout)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        model = train_model(model, criterion, optimizer, train_loader)  # Training function

        # Evaluate the model on the validation set
        accuracy = evaluate_model(model, criterion, test_loader)  # Evaluation function

        # If this model is better than the current best, update the best model
        if accuracy > best_accuracy:
          best_model = model
          best_accuracy = accuracy


Epoch 1/10 | Train Loss: 0.3763 | Val Loss: 0.2195 | Val Acc: 0.9206
Epoch 2/10 | Train Loss: 0.2379 | Val Loss: 0.1956 | Val Acc: 0.9298
Epoch 3/10 | Train Loss: 0.2257 | Val Loss: 0.1956 | Val Acc: 0.9317
Epoch 4/10 | Train Loss: 0.2227 | Val Loss: 0.2046 | Val Acc: 0.9306
Epoch 5/10 | Train Loss: 0.2134 | Val Loss: 0.1885 | Val Acc: 0.9331
Epoch 6/10 | Train Loss: 0.2146 | Val Loss: 0.1792 | Val Acc: 0.9357
Epoch 7/10 | Train Loss: 0.2072 | Val Loss: 0.1971 | Val Acc: 0.9339
Epoch 8/10 | Train Loss: 0.2106 | Val Loss: 0.1816 | Val Acc: 0.9372
Epoch 9/10 | Train Loss: 0.2018 | Val Loss: 0.1865 | Val Acc: 0.9320
Epoch 10/10 | Train Loss: 0.2033 | Val Loss: 0.2010 | Val Acc: 0.9284
Test Loss: 0.2009 | Test Acc: 0.9284
Epoch 1/10 | Train Loss: 0.4067 | Val Loss: 0.2013 | Val Acc: 0.9317
Epoch 2/10 | Train Loss: 0.2469 | Val Loss: 0.1933 | Val Acc: 0.9342
Epoch 3/10 | Train Loss: 0.2311 | Val Loss: 0.1991 | Val Acc: 0.9298
Epoch 4/10 | Train Loss: 0.2260 | Val Loss: 0.1891 | Val Acc: 0.9