In [None]:
# Imports
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from PIL import Image
import pillow_avif

In [None]:
# Load dataset
image_path = r"C:\Users\jodis\ML\Project3\Images"

print("Image root path:", image_path)
print("Does the path exist?", os.path.exists(image_path))

In [None]:
# Convert type and transform image size

valid_extensions = {".avif", ".png", ".webp", ".jpg", ".jpeg"}

for folder, subfolders, files in os.walk(image_path):
    for file in files:
        file_lower = file.lower()
        file_ext = os.path.splitext(file_lower)[1]

        if file_ext in valid_extensions:
            full_path = os.path.join(folder, file)

            # New filename with .jpg extension
            new_filename = os.path.splitext(file)[0] + ".jpg"
            new_path = os.path.join(folder, new_filename)

            # If the .jpg already exists, skip to avoid redoing work
            if os.path.exists(new_path):
                continue

            try:
                img = Image.open(full_path).convert("RGB")
                img.save(new_path, "JPEG", quality=95)
                print(f"Converted: {full_path} -> {new_path}")

                # Delete original file only after successful save
                if full_path != new_path:
                    os.remove(full_path)
                    print(f"Deleted original file: {full_path}")

            except Exception as e:
                print(f"Failed to convert {full_path}: {e}")

print("Image conversion step completed.\n")

transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.Resize((500, 500)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

full_dataset = datasets.ImageFolder(
    root=image_path,
    transform=transform
)

print("Classes found:", full_dataset.classes)
print("Total images:", len(full_dataset))

In [None]:
# Train/valid/test split
torch.manual_seed(1)

dataset_size = len(full_dataset)
valid_size = int(0.2 * dataset_size)
test_size = int(0.1 * dataset_size)
train_size = dataset_size - valid_size - test_size

train_dataset, valid_dataset, test_dataset = random_split(full_dataset, [train_size, valid_size, test_size])

print(f"Train size: {train_size}")
print(f"Validation size: {valid_size}")
print(f"Test size: {test_size}")

# DataLoaders
batch_size = 32
train_dl = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_dl = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# CNN architecture
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

num_classes = len(full_dataset.classes)
print("Number of classes:", num_classes)

def build_cnn(dropout_p=0.5):
    model = nn.Sequential()

    # Block 1
    model.add_module('conv1', nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1))
    model.add_module('relu1', nn.ReLU())
    model.add_module('pool1', nn.MaxPool2d(kernel_size=2))      # 500 -> 250
    model.add_module('dropout1', nn.Dropout(p=dropout_p))

    # Block 2
    model.add_module('conv2', nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1))
    model.add_module('relu2', nn.ReLU())
    model.add_module('pool2', nn.MaxPool2d(kernel_size=2))      # 250 -> 125
    model.add_module('dropout2', nn.Dropout(p=dropout_p))

    # Block 3
    model.add_module('conv3', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1))
    model.add_module('relu3', nn.ReLU())
    model.add_module('pool3', nn.MaxPool2d(kernel_size=2))      # 125 -> 62 (floor)

    # Block 4
    model.add_module('conv4', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1))
    model.add_module('relu4', nn.ReLU())
    model.add_module('pool4', nn.MaxPool2d(kernel_size=2))      # 62 -> 31

    # Global average pooling: 256 x 31 x 31 -> 256 x 1 x 1
    model.add_module('gap', nn.AdaptiveAvgPool2d((1, 1)))
    model.add_module('flatten', nn.Flatten())

    # Fully connected layer to num_classes
    model.add_module('fc', nn.Linear(256, num_classes))

    return model.to(device)

# Check forward pass shape
tmp_model = build_cnn(dropout_p=0.5)
x_dummy = torch.ones((4, 3, 500, 500)).to(device)
with torch.no_grad():
    out_dummy = tmp_model(x_dummy)
print("Output shape (batch_size=4):", out_dummy.shape)

In [None]:
# Training function
def train_model(model, num_epochs, train_dl, valid_dl, lr=0.001):
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    loss_hist_train = [0.0] * num_epochs
    accuracy_hist_train = [0.0] * num_epochs
    loss_hist_valid = [0.0] * num_epochs
    accuracy_hist_valid = [0.0] * num_epochs

    for epoch in range(num_epochs):
        # Train
        model.train()
        for x_batch, y_batch in train_dl:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            pred = model(x_batch) # shape: [batch_size, num_classes]
            loss = loss_fn(pred, y_batch)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            loss_hist_train[epoch] += loss.item() * y_batch.size(0)
            is_correct = (torch.argmax(pred, dim=1) == y_batch).float()
            accuracy_hist_train[epoch] += is_correct.sum().item()

        loss_hist_train[epoch] /= len(train_dl.dataset)
        accuracy_hist_train[epoch] /= len(train_dl.dataset)

        # Validation
        model.eval()
        with torch.no_grad():
            for x_batch, y_batch in valid_dl:
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)

                pred = model(x_batch)
                loss = loss_fn(pred, y_batch)

                loss_hist_valid[epoch] += loss.item() * y_batch.size(0)
                is_correct = (torch.argmax(pred, dim=1) == y_batch).float()
                accuracy_hist_valid[epoch] += is_correct.sum().item()

        loss_hist_valid[epoch] /= len(valid_dl.dataset)
        accuracy_hist_valid[epoch] /= len(valid_dl.dataset)

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train_acc: {accuracy_hist_train[epoch]:.4f}")
        print(f"Val_acc: {accuracy_hist_valid[epoch]:.4f}")

    return loss_hist_train, loss_hist_valid, accuracy_hist_train, accuracy_hist_valid

In [None]:
# Hyperparameter tuning (multiple candidate CNNs)
torch.manual_seed(1)

candidate_configs = [
    {"name": "model_lr_1e-3_drop_0.5", "lr": 1e-3, "dropout": 0.5},
    {"name": "model_lr_5e-4_drop_0.5", "lr": 5e-4, "dropout": 0.5},
    {"name": "model_lr_1e-3_drop_0.3", "lr": 1e-3, "dropout": 0.3},
    {"name": "model_lr_5e-4_drop_0.3", "lr": 5e-4, "dropout": 0.3},
    {"name": "model_lr_1e-4_drop_0.3", "lr": 1e-4, "dropout": 0.3},
]

num_epochs_tune = 10

history_by_name = {}
best_val_acc = 0.0
best_config = None
best_model_state = None

for config in candidate_configs:
    print(f"Training candidate: {config['name']}")

    # Build a fresh model for this config
    model = build_cnn(dropout_p=config["dropout"])

       # Train model
    hist = train_model(
        model=model,
        num_epochs=num_epochs_tune,
        train_dl=train_dl,
        valid_dl=valid_dl,
        lr=config["lr"]
    )

    history_by_name[config["name"]] = hist

    # Use final validation accuracy as score
    val_acc_best_epoch = max(hist[3])
    print(f"Best validation accuracy for {config['name']}: {val_acc_best_epoch:.4f}")

    if val_acc_best_epoch > best_val_acc:
        best_val_acc = val_acc_best_epoch
        best_config = config
        best_model_state = model.state_dict()

print("\nBest candidate configuration:", best_config)
print(f"Best validation accuracy: {best_val_acc:.4f}")

In [None]:
# Rebuild best model and fine tune
final_model = build_cnn(dropout_p=best_config["dropout"])
final_model.load_state_dict(best_model_state)

# Optionally, train a few more epochs on the same train/valid split
num_epochs_final = 0  # set > 0 if you want further training

if num_epochs_final > 0:
    print("\nFine-tuning best model further...")
    _ = train_model(
        model=final_model,
        num_epochs=num_epochs_final,
        train_dl=train_dl,
        valid_dl=valid_dl,
        lr=best_config["lr"]
    )

In [None]:
# Test-set evaluation
final_model.eval()
test_correct = 0

with torch.no_grad():
    for x_batch, y_batch in test_dl:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        pred = final_model(x_batch)
        predicted_labels = torch.argmax(pred, dim=1)

        is_correct = (predicted_labels == y_batch).float()
        test_correct += is_correct.sum().item()

test_accuracy = test_correct / len(test_dl.dataset)
print(f"Test accuracy: {test_accuracy:.4f}")

In [None]:
# Plot training history
best_name = best_config["name"]
best_hist = history_by_name[best_name]

loss_hist_train, loss_hist_valid, acc_hist_train, acc_hist_valid = best_hist

epochs_arr = np.arange(len(loss_hist_train)) + 1

fig = plt.figure(figsize=(12, 4))

ax = fig.add_subplot(1, 2, 1)
ax.plot(epochs_arr, loss_hist_train, '-o', label='Train loss')
ax.plot(epochs_arr, loss_hist_valid, '--<', label='Validation loss')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.legend()

ax = fig.add_subplot(1, 2, 2)
ax.plot(epochs_arr, acc_hist_train, '-o', label='Train acc.')
ax.plot(epochs_arr, acc_hist_valid, '--<', label='Validation acc.')
ax.set_xlabel('Epoch')
ax.set_ylabel('Accuracy')
ax.legend()

plt.show()

In [None]:
# Save model
models_dir = "models"
if not os.path.exists(models_dir):
    os.mkdir(models_dir)

save_path = os.path.join(models_dir, "Group_26_CNN_FullModel.ph")

# Save entire model (architecture + weights)
torch.save(final_model, save_path)

print("Saved final model to:", save_path)

### Question 1
How did you create your dataset and determine split of official logo versus not official logo?

I built the dataset by collecting images from google images, including images from the schools website, etsy, and pinterest. I then sorted the images into folders based on whether or not they were official logos or not. The unofficial folder contained other arkansas logos that weren’t the official logo, arkansas merch, and even pictures of pigs. I tried to include various colorways of the official logos, and a good mix of different images in the unofficial folder. 

### Question 2
Why did you choose the specific architecture for the final model?

For our project, we designed a model architecture tailored for binary logo classification to determine whether an image contains the logo or not. We implemented a convolutional neural network (CNN) and used a cross entropy loss function, which directly works with logits. Because the loss function applies the necessary normalization internally, our model does not require a final softmax layer. This setup allows the network to output raw prediction scores while still enabling stable and effective training. The architecture uses multiple convolution and pooling layers to extract increasingly abstract visual features of logos.

### Question 3
How did you monitor and mitigate overfitting?

Overfitting was monitored using training vs. validation accuracy and loss curves, as well as final test accuracy on previously unseen images. The model mitigates overfitting through dropout layers in the CNN, proper train/validation/test splitting, shuffling of training batches, and hyperparameter tuning of learning rate and dropout strength. These combined approaches ensure that the selected model is the one that generalizes best, not just the one that performs well on the training data.

### Question 4
What future efforts do you recommend to improve model performance?

To improve our project in the future, we could gather a bigger and more diverse dataset of logos and non logo images to strengthen the model’s ability to generalize. Trying more advanced or specialized image recognition architectures might also help boost accuracy. Additionally, fine tuning our model on higher quality images, and removing the backgrounds, could help the model focus. 