In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
# from google.colab import drive
# drive.mount('/content/drive')
import pandas as pd
from torchvision.io import decode_image
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms

In [None]:
labels_map = {
    0: "Anthracnose",
    1: "Banana Fruit-Scarring Beetle",
    2: "Banana Skipper Damage",
    3: 'Banana Split Peel',
    4: "Black and Yellow Sigatoka",
    5: "Chewing insect damage on banana leaf",
    6: "Healthy Banana",
    7: "Healthy Banana leaf",
    8: "Panama Wilt Disease",
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# change 
dataset_path = '/content/drive/MyDrive/bananadata/AUGMENTED/data'
build_path = '/content/drive/MyDrive/bananadata/build/augs'

In [None]:

BATCH_SIZE = 64
EPOCHS = 10 # Try increasing epochs to 30
LEARNING_RATE = 0.01
PATCH_SIZE = 16
NUM_CLASSES = 9
IMAGE_SIZE = 224 # Transform the image and make the size go to 224
CHANNELS = 3
EMBED_DIM = 256
NUM_HEADS = 8 # INcrease the number heads
DEPTH = 6
MLP_DIM = 512
DROP_RATE = 0.1

In [None]:
normalize = transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5],
    )
transform = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            normalize
    ])

In [None]:
dataset_train = datasets.ImageFolder(root=dataset_path+'/train', transform=transform)
dataset_val = datasets.ImageFolder(root=dataset_path+'/validation', transform=transform)
dataset_test = datasets.ImageFolder(root=dataset_path+'/test', transform=transform)
build_train = datasets.ImageFolder(root=build_path, transform=transform)

In [None]:
load_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
load_val = DataLoader(dataset_val, batch_size=batch_size, shuffle=True)
load_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)
load_build = DataLoader(build_train, batch_size=batch_size, shuffle=True)

# VISION TRANSFORMER

In [None]:
class PatchEmbedding(nn.Module):
    def __init__(self,
                 img_size,
                 patch_size,
                 in_channels,
                 embed_dim):
        super().__init__()
        self.patch_size = patch_size
        self.proj = nn.Conv2d(in_channels=in_channels,
                              out_channels=embed_dim,
                              kernel_size=patch_size,
                              stride=patch_size)
        num_patches = (img_size // patch_size) ** 2
        self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.randn(1, 1 + num_patches, embed_dim))

    def forward(self, x: torch.Tensor):
        B = x.size(0)
        x = self.proj(x) # (B, E, H/P, W/P)
        x = x.flatten(2).transpose(1, 2) # (B, N, E)
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embed
        return x

In [None]:
class MLP(nn.Module):
    def __init__(self,
                 in_features,
                 hidden_features,
                 drop_rate):
        super().__init__()
        self.fc1 = nn.Linear(in_features=in_features,
                             out_features=hidden_features)
        self.fc2 = nn.Linear(in_features=hidden_features,
                             out_features=in_features)
        self.dropout = nn.Dropout(drop_rate)

    def forward(self, x):
        x = self.dropout(F.gelu(self.fc1(x)))
        x = self.dropout(self.fc2(x))
        return x

In [None]:
class TransformerEncoderLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, mlp_dim, drop_rate):
        super().__init__()
        self.norm1 = nn.LayerNorm(embed_dim)
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=drop_rate, batch_first=True)
        self.norm2 = nn.LayerNorm(embed_dim)
        self.mlp = MLP(embed_dim, mlp_dim, drop_rate)

    def forward(self, x):
        x = x + self.attn(self.norm1(x), self.norm1(x), self.norm1(x))[0]
        x = x + self.mlp(self.norm2(x))
        return x

In [None]:
class VisionTransformer(nn.Module):
    def __init__(self, img_size, patch_size, in_channels, num_classes, embed_dim, depth, num_heads, mlp_dim, drop_rate):
        super().__init__()
        self.patch_embed = PatchEmbedding(img_size, patch_size, in_channels, embed_dim)
        self.encoder = nn.Sequential(*[
            TransformerEncoderLayer(embed_dim, num_heads, mlp_dim, drop_rate)
            for _ in range(depth)
        ])
        self.norm = nn.LayerNorm(embed_dim)
        self.head = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        x = self.patch_embed(x)
        x = self.encoder(x)
        x = self.norm(x)
        cls_token = x[:, 0]
        return self.head(cls_token)


In [None]:
model = VisionTransformer(
    IMAGE_SIZE, PATCH_SIZE, CHANNELS, NUM_CLASSES,
    EMBED_DIM, DEPTH, NUM_HEADS, MLP_DIM, DROP_RATE
).to(device)

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

criterion = nn.CrossEntropyLoss() # Measure how wrong our model is
optimizer = torch.optim.Adam(params=model.parameters(), # update our model's parameters to try and reduce the loss
                             lr=LEARNING_RATE)
total_step = len(load_train)

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, batch in enumerate(load_train):
        # Move tensors to the configured device
        images = batch[0].to(device)
        labels = batch[1].to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * labels.size(0)
    train_loss = running_loss / len(load_train.dataset)
    train_losses.append(train_loss)

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Validation
    running_loss = 0.0
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in load_val:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
            running_loss += loss.item() * labels.size(0)
        val_loss = running_loss / len(load_val.dataset)
        val_losses.append(val_loss)
        val_acc = 100 * correct / total
        val_accuracies.append(val_acc)
        print('Accuracy of the network on the {} validation images: {} %'.format(945, 100 * correct / total)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import numpy as np

In [None]:
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

In [None]:
model.eval()
test_y_true = []
test_y_pred = []

with torch.no_grad():
    correct = 0
    total = 0
    for batch in load_test:
        images = batch['image'].to(device)
        labels = batch['label'].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

        del images, labels, outputs

In [None]:
print('Accuracy of the network on the {} test images: {} %'.format(len(test_ds), 100 * correct / total))

cm = confusion_matrix(y_true, y_pred)

# Plot Confusion Matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels_map.values(), yticklabels=labels_map.values())
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

In [None]:
model.eval()
build_y_true = []
build_y_pred = []

with torch.no_grad():
    correct = 0
    total = 0
    # change to buuild dataset
    for images, labels in load_build:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

In [None]:
import json
import os

# Define the directory to save the results
results_dir = "/content/drive/MyDrive/bananadata/results/vit"
os.makedirs(results_dir, exist_ok=True)

# Define hyperparameters and results
hyperparameters = {
    "learning_rate": learning_rate,
    "epoch": num_epochs,
    "batch_size": batch_size,
    "optimizer": "SGD",
    "loss_function": "CrossEntropyLoss",
    "model_architecture": "VGG16"
}

results = {
    "train_losses": train_losses,
    "val_losses": val_losses,
    # Assuming you calculated training and validation accuracies and stored them in train_accuracies and val_accuracies lists
    "val_accuracies": val_accuracies,
    "test_y_true": test_y_true,
    "test_y_pred": test_y_pred,
    "build_y_true": build_y_true,
    "build_y_pred": build_y_pred,
}

test_index = 1
# Save hyperparameters
hyperparameters_path = os.path.join(results_dir, f"hyperparameters{test_index}.json")
with open(hyperparameters_path, "w") as f:
    json.dump(hyperparameters, f, indent=4)
print(f"Hyperparameters saved to {hyperparameters_path}")

# Save results
results_path = os.path.join(results_dir, f"results{test_index}.json")
with open(results_path, "w") as f:
    json.dump(results, f)
print(f"Results saved to {results_path}")