In [None]:
import os

from collections import Counter

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from torch.utils.data import DataLoader, random_split
from torchinfo import summary
from torchvision import datasets, transforms
from tqdm.notebook import tqdm

torch.backends.cudnn.deterministic = True

In [None]:
if torch.cuda.is_available():
    device = "cuda"

In [None]:
os.listdir("sea_creatures")

In [None]:
train_dir = os.path.join("sea_creatures","train")
classes = os.listdir(train_dir)
print(classes)

In [None]:
height = 224
width = 224


class ConvertToRGB:
    def __call__(self, img):
        if img.mode != "RGB":
            img = img.convert("RGB")
        return img


transform = transforms.Compose([
    ConvertToRGB(),                       # Ensure the image is in RGB format
    transforms.Resize((width, height)),   # Resize to 224x224
    transforms.PILToTensor(),             # Convert to a PyTorch tensor
    transforms.ConvertImageDtype(torch.float)  # Optional: normalize to float
])

print(transform)

In [None]:
sample_file = "sea_creatures/train/Dolphin/10004986625_0f786ab86b_b.jpg"

image = Image.open(sample_file) # load your image

transformed_image = transform(image)
print(transformed_image.shape)

In [None]:
dataset =datasets.ImageFolder(
    root=train_dir,  # Replace with the path to your training data folder
    transform=transform   # Apply the transformer pipeline
)
print("Image size", dataset[0][0].shape)
print("Label", dataset[0][1])

In [None]:
# This will get us the counts, but notice that the keys are the class indices,
# not the class names.
counts = Counter(x[1] for x in tqdm(dataset))
print("The counts dictionary:", counts)

# This dictionary maps class names to their index.
print("The class_to_idx dictionary:", dataset.class_to_idx)

# Use both of these to construct the desired dictionary

class_distribution = {Class:counts[dataset.class_to_idx[Class]] for Class in classes}
print(class_distribution)

In [None]:
batch_size = 32
dataset_loader = DataLoader(
    dataset,
    batch_size=32,        # Adjust batch size as needed
)

# Get one batch
first_batch = next(iter(dataset_loader))

print(f"Shape of one batch: {first_batch[0].shape}")
print(f"Shape of labels: {first_batch[1].shape}")

In [None]:
def get_mean_std(loader):
    """Computes the mean and standard deviation of image data.

    Input: a `DataLoader` producing tensors of shape [batch_size, channels, pixels_x, pixels_y]
    Output: the mean of each channel as a tensor, the standard deviation of each channel as a tensor
            formatted as a tuple (means[channels], std[channels])"""

    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _ in tqdm(loader):
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data**2, dim=[0, 2, 3])
        num_batches += 1
    # Compute the mean from the channels_sum and num_batches
    mean = channels_sum / num_batches
    # Compute the standard deviation form channels_squared_sum, num_batches,
    # and the mean.
    std = torch.sqrt((channels_squared_sum / num_batches) - (mean ** 2))

    return mean, std


mean, std = get_mean_std(dataset_loader)

print(f"Mean: {mean}")
print(f"Standard deviation: {std}")

In [None]:
transform_norm = transforms.Compose([
transforms.Resize((224, 224)), # Resize image to 224x224
transforms.ToTensor(), # Convert image to a tensor (C, H, W)
transforms.Normalize(mean=mean, std=std) # Normalize the image channels
])

print(transform_norm)

In [None]:
norm_dataset =datasets.ImageFolder(
    root=train_dir,  # Replace with the path to your training data folder
    transform=transform_norm   # Apply the transformer pipeline
)
# print("Image size", norm_dataset)

print("Image size", norm_dataset[0][0].shape)
print("Label", norm_dataset[0][1])

In [None]:
# Set a seed for reproducibility
g = torch.Generator().manual_seed(42)
train_dataset, val_dataset = random_split(dataset, [.8, .2], generator=g)

print("Training data set size:", len(train_dataset))
print("Validation data set size:", len(val_dataset))

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
model = torch.nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),  # Conv2D layer
    nn.ReLU(),  # ReLU activation
    nn.MaxPool2d(kernel_size=4, stride=4),  # Max pooling
)

# ... your layers here ...
summary(model, input_size=(batch_size, 3, height, width))

In [None]:
model = torch.nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),  # Conv2D layer
    nn.ReLU(),  # ReLU activation
    nn.MaxPool2d(kernel_size=4, stride=4),  # Max pooling
    nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),  # Conv2D layer
    nn.ReLU(),  # ReLU activation
    nn.MaxPool2d(kernel_size=4, stride=4),  # Max pooling
)

# Add these layers to the model

summary(model, input_size=(batch_size, 3, height, width))

In [None]:
model = torch.nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),  # Conv2D layer
    nn.ReLU(),  # ReLU activation
    nn.MaxPool2d(kernel_size=4, stride=4),  # Max pooling
    nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),  # Conv2D layer
    nn.ReLU(),  # ReLU activation
    nn.MaxPool2d(kernel_size=4, stride=4),  # Max pooling
    nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),  # Conv2D layer
    nn.ReLU(),  # ReLU activation
    nn.MaxPool2d(kernel_size=4, stride=4),  # Max pooling
    nn.Flatten() 
)
# Add the new layers

summary(model, input_size=(batch_size, 3, height, width))

In [None]:
model = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),  
    nn.ReLU(),  
    nn.MaxPool2d(kernel_size=4, stride=4),  
    nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1), 
    nn.ReLU(),  
    nn.MaxPool2d(kernel_size=4, stride=4),  
    nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),  
    nn.ReLU(),  
    nn.MaxPool2d(kernel_size=4, stride=4), 
    nn.Flatten(),  
    nn.Dropout(p=0.5),
    nn.Linear(576, 500),  # Assuming input images were 32x32 (after pooling size becomes 2x2)
    nn.ReLU(),  
    nn.Dropout(p=0.5),
    nn.Linear(500, 9))
# Add the final layers

summary(model, input_size=(batch_size, 3, height, width))

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # lr can be adjusted

# Send the model to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

In [None]:
# Import the train and predict functions from `training.py`, instead of typing them out!
from training import train, predict  # Import train and predict functions from training.py

epochs = 10
# Train the model for 10 epochs

train(model, optimizer, loss_fn, train_loader, val_loader, epochs=10, device=device)
    


In [None]:
# Compute the probabilities for each validation image
probabilities = predict(model, val_loader, device=device)

# Get the index associated with the largest probability for each
predictions = torch.argmax(probabilities,axis=1)

print("Number of predictions:", predictions.shape)

In [None]:
targets = []

for _, labels in tqdm(val_loader):
    targets.extend(labels.tolist())

In [None]:
torch.Tensor(targets).shape
predictions.shape

In [None]:
predictions.shape

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Don't change this
fig, ax = plt.subplots(figsize=(10, 6))

cm = confusion_matrix(torch.Tensor(targets).to('cpu'), predictions.to('cpu'))

# Get the class names
classes = classes

# Display the confusion matrix (don't change this)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
disp.plot(cmap=plt.cm.Blues, xticks_rotation="vertical", ax=ax)

In [None]:
test_dir = os.path.join("sea_creatures","test")

test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

print("Number of test images:", len(test_dataset))

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) 

In [None]:
# Predict the probabilities for each test image
test_probabilities = predict(model, test_loader, device=device)

# Get the index associated with the largest probability for each test image
test_predictions = torch.argmax(probabilities,axis=1)

print("Number of predictions:", test_predictions.shape)

In [None]:
test_classes = [classes[i] for i in test_predictions]

print("Number of class predictions:", len(test_classes))

In [None]:
import matplotlib.pyplot as plt
import random

# Sample 12 random indices from the test dataset
sample_indices = random.sample(range(len(test_loader.dataset.samples)), 12)

# Create a grid of 4x3 subplots
fig, axes = plt.subplots(4, 3, figsize=(20, 10))

# Iterate over the sampled indices and plot the corresponding images
for ax, idx in zip(axes.flatten(), sample_indices):
    image_path = test_loader.dataset.samples[idx][0]
    img = Image.open(image_path)

    # Display the image on the axis
    ax.imshow(img)
    ax.axis('off')

    # Get the predicted class for this image
    predicted_class = test_classes[idx]

    # Set the title of the subplot to the predicted class
    ax.set_title(f"Predicted: {predicted_class}", fontsize=14)

plt.tight_layout()