In [1]:
import torch
import torchvision.models as models
from PIL import Image
import os
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
vgg16 = models.vgg16(pretrained=True)

for param in vgg16.features.parameters():
    param.requires_grad = False

num_features = vgg16.classifier[-1].in_features

vgg16.classifier[-2] = nn.Linear(num_features, 2048)
vgg16.classifier[-1] = nn.Linear(2048, 2)



In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [4]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16.classifier.parameters(), lr=1e-6)

In [5]:
class ImageDataset(Dataset):
    def __init__(self, root_dir, categories, transform=None):
        self.root_dir = root_dir
        self.categories = categories
        self.transform = transform
        self.data = []

        for label, category in enumerate(categories):
            category_path = os.path.join(root_dir, category)
            for img_name in os.listdir(category_path):
                img_path = os.path.join(category_path, img_name)
                self.data.append((img_path, label))
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)
        
        return image, label

In [6]:
transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
catagories = ["black", "no_black"]

train_dataset = ImageDataset("augmented_data/train", catagories, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = ImageDataset("augmented_data/val", catagories, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

test_dataset = ImageDataset("augmented_data/test", catagories, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [8]:
num_epochs = 100
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    vgg16.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = vgg16(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    vgg16.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = vgg16(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)
    val_losses.append(avg_val_loss)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_losses, label="Train Loss", marker='o')
plt.plot(range(1, num_epochs + 1), val_losses, label="Validation Loss", marker='s')

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation Loss Over Epochs")
plt.legend()
plt.grid()
plt.show()

Epoch 1/100, Train Loss: 0.6858, Val Loss: 0.6689
Epoch 2/100, Train Loss: 0.6320, Val Loss: 0.6344
Epoch 3/100, Train Loss: 0.5843, Val Loss: 0.6051
Epoch 4/100, Train Loss: 0.5476, Val Loss: 0.5805
Epoch 5/100, Train Loss: 0.5105, Val Loss: 0.5580
Epoch 6/100, Train Loss: 0.4784, Val Loss: 0.5372
Epoch 7/100, Train Loss: 0.4483, Val Loss: 0.5194
Epoch 8/100, Train Loss: 0.4168, Val Loss: 0.5031
Epoch 9/100, Train Loss: 0.3916, Val Loss: 0.4877
Epoch 10/100, Train Loss: 0.3676, Val Loss: 0.4746
Epoch 11/100, Train Loss: 0.3465, Val Loss: 0.4606
Epoch 12/100, Train Loss: 0.3266, Val Loss: 0.4489
Epoch 13/100, Train Loss: 0.3128, Val Loss: 0.4380
Epoch 14/100, Train Loss: 0.2912, Val Loss: 0.4267
Epoch 15/100, Train Loss: 0.2739, Val Loss: 0.4152
Epoch 16/100, Train Loss: 0.2604, Val Loss: 0.4047
Epoch 17/100, Train Loss: 0.2474, Val Loss: 0.3949
Epoch 18/100, Train Loss: 0.2326, Val Loss: 0.3846
Epoch 19/100, Train Loss: 0.2216, Val Loss: 0.3751
Epoch 20/100, Train Loss: 0.2108, Val Lo

KeyboardInterrupt: 

In [None]:
vgg16.eval()

y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = vgg16(images)
        _, predicted = torch.max(outputs, 1)

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

acc = accuracy_score(y_true, y_pred)
print(acc)
result = classification_report(y_true, y_pred)
print(result)


In [None]:
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Class 0", "Class 1"], yticklabels=["Class 0", "Class 1"])

plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()