### Finding Waldo using AI
#### Where is Waldo?

In [1]:
import os
import torch
import random
import cupy as np
from PIL import Image
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import transforms
from IPython.display import clear_output
import torch.optim.lr_scheduler as lr_scheduler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler

from AI_model import WaldoRecognizer #Local file

seed = 300900
torch.manual_seed(seed)

if torch.cuda.is_available():
    torch.set_default_device("cuda:0")
    torch.cuda.set_device("cuda:0")
    device = "cuda:0"

In [None]:
class WaldoDataset(Dataset): #Our dataset class
    def __init__(self):
        self.waldo_folder = "Data/Waldo"
        self.notWaldo_folder = "Data/NotWaldo"
        self.image_files = [(os.path.join(self.waldo_folder, f), 1) for f in os.listdir(self.waldo_folder)] + [(os.path.join(self.notWaldo_folder, f), 0) for f in os.listdir(self.notWaldo_folder)]

    def __len__(self) -> int:
        return len(self.image_files)
    
    def __getitem__(self, idx) -> Image:
        image = Image.open(self.image_files[idx][0])

        to_tensor = transforms.Compose([
            # transforms.RandomHorizontalFlip(),          # Random horizontal flip
            # transforms.RandomVerticalFlip(),            # Random vertical flip
            # transforms.RandomRotation(30),              # Random rotation within 30 degrees
            # transforms.RandomGrayscale(p=0.2),          # Convert image to grayscale with a probability of 20%
            # transforms.RandomPerspective(distortion_scale=0.5, p=0.5),  # Random perspective transformation
            # transforms.RandomResizedCrop(size=64, scale=(0.95, 1.0)),  # Random resized crop
            # transforms.RandomApply([transforms.GaussianBlur(kernel_size=5)], p=0.5),  # Random Gaussian blur
            transforms.ToTensor(),                      # Convert to tensor
            # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet means and stds
            transforms.Normalize(mean=[0] * 3, std=[1] * 3)  # Normalize with Normal distribution means and stds
        ])
        
        return to_tensor(image), self.image_files[idx][1]
    
    def getLabel(self, idx):
        return self.image_files[idx][1]

In [None]:
def show_images(images):
    fig, axes = plt.subplots(1, 5, figsize=(15,6))
    to_image = transforms.ToPILImage()
    
    for i in range(len(images)):
        axes[i].imshow(to_image(images[i][0]))
        axes[i].set_title(f"Waldo : {images[i][1]}")
        axes[i].axis("off")

    plt.tight_layout()
    plt.show()

In [None]:
dataset = WaldoDataset()

In [None]:
random_array = np.random.randint(0, len(dataset), 5, dtype='int')
images = [dataset[int(i)] for i in random_array]
show_images(images)

In [None]:
batch_size =  2 ** 6
print(batch_size)

train_indices, test_indices = train_test_split(range(len(dataset)), test_size=0.2, random_state=seed)
train_indices, valid_indices = train_test_split(train_indices, test_size=0.4, random_state=seed)

train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)
test_sampler = SubsetRandomSampler(test_indices)

train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
valid_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler)
test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)

In [None]:
network = WaldoRecognizer().to(device)
loss_function = nn.CrossEntropyLoss()

In [None]:
patience = 5
counter = 0
best_val_loss = float('inf')

In [None]:
lr = 1e-3
weight_decay = 1e-4
# momentum = 0.95 # Used with SGD

optimizer = optim.Adam(network.parameters(), lr=lr, weight_decay=weight_decay)

step_size = 2
gamma = 0.75
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

epochs = 20
train_losses = []
train_accuracies = []
for epoch in range(epochs):
    
    network.train()
    with tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Training") as t:
        for data, target in t:
            total = 0
            correct = 0
            data = data.to(device)
            target_tensor = target.clone().detach()
            optimizer.zero_grad()
            output = network(data)
            loss = loss_function(output, target_tensor)
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

            train_losses.append(loss.item())

            accuracy = (correct / total)
            train_accuracies.append(accuracy)

            t.set_postfix(current_loss=loss.item(), accuracy=accuracy)
            
            clear_output(wait=True)
            fig, ax1 = plt.subplots()

            color = 'tab:red'
            ax1.set_xlabel('Iterations')
            ax1.set_ylabel('Loss', color=color)
            ax1.plot(train_losses, label='Training Loss', color=color)
            ax1.tick_params(axis='y', labelcolor=color)

            ax2 = ax1.twinx()
            color = 'tab:blue'
            ax2.set_ylabel('Accuracy', color=color)
            ax2.plot(train_accuracies, label='Training Accuracy', color=color)
            ax2.tick_params(axis='y', labelcolor=color)

            fig.tight_layout()
            plt.title('Dynamic Loss and Accuracy Plot')
            plt.show()

    network.eval()  
    val_loss = 0.0
    with torch.no_grad(), tqdm(valid_loader, desc=f"Epoch {epoch+1}/{epochs} - Validation") as v:
        for data, target in v:
            data = data.to(device)
            output = network(data)  
            target_tensor = target.clone().detach()
            loss = loss_function(output, target_tensor)
            val_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            v.set_postfix(val_accuracy=(correct / total))

    val_loss /= len(valid_loader.dataset)  
        
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0  
        torch.save(network.state_dict(), 'best_model.pth')
    else:
        counter += 1
    
    if counter >= patience:
        print(f'Early stopping at epoch {epoch+1}, best validation loss: {best_val_loss}')
        break
    scheduler.step()

    print(f'Epoch {epoch+1}/{epochs}, '
          f'Val Accuracy: {(100 * correct / total):.2f}%')

In [None]:
good_guess = 0
total = 0
predictions = []
for data, target in tqdm(test_loader):
    with torch.no_grad():
        data = data.to(device)
        output = network(data)
        target_tensor = target.clone().detach()
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        good_guess += (predicted == target).sum().item()
        predicted = predicted.cpu().numpy()
        target = target_tensor.cpu().numpy()
        
        for i in range(len(data)):
            img = data[i].cpu().numpy().transpose((1, 2, 0))

            predictions.append((img, predicted[i], target[i]))

print("%.4f%% of accuracy for this model." % (good_guess / total))

In [None]:
def display_images(images):
    num_images = len(images)
    fig, axes = plt.subplots(1, num_images, figsize=(20, 5))

    for i, (image, prediction, target) in enumerate(images):
        axes[i].imshow(image)
        axes[i].set_title(f"Predicted: {prediction} \n Target: {target}")
        axes[i].axis('off')

    plt.show()

In [None]:
random_samples = random.sample(predictions, 10)

display_images(random_samples)