# MECH 3465 Robotics & Machine Intelligence - Coursework 1

Dan Nehushtan - 201594650
<br> Louie Burns - 201588498

### Introduction
This coursework applies Convolutional Neural Networks (CNNs) to classify aircraft in remote sensing images, optimising performance through hyperparameter tuning and pre-processing. The evaluation uses the weighted F1-score to assess model improvements.

### Tasks
1. Design a convolutional neural network (CNN) for aircraft classification, justifying its suitability for the task.
2. Document the hyperparameter tuning process and analyse its impact on model performance.
3. Optimise the CNN through pre-processing techniques or alternative network structures, using weighted F1-score for evaluation.
4. Provide a detailed evaluation, comparing the initial and optimised models, including metrics and a critical reflection on results.


 ## Task 1 - CNN
In this task you wil train the Aircraft Recognition Dataset using pytorch and train a Convolutional Neural Network (CNN) to classify the Aircraft Recognition Dataset using weighted F1 score. You should train the model using a GPU if available.

##### Import libraries and read the folder (inluding the text file titles)

In [1]:
# INSTALL MISSING LIBRARIES (This depends on device and GPU used - some stuff may already be satisfied, some may not)
%pip install datasets
%pip install pandas
%pip install scikit-learn

#Import libraries used 
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import tarfile
from torchvision.datasets.utils import download_url
from torch.utils.data import random_split, Dataset, DataLoader
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
from pathlib import Path
from tqdm import tqdm
from PIL import Image
import pandas as pd
from datasets import Dataset, DatasetDict

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


## Prepare the device and load the model

In [2]:
## check devices

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# If CUDA is available, print the GPU name and the device being used
if torch.cuda.is_available():
    # Get the name of the current GPU
    gpu_name = torch.cuda.get_device_name(device)
    print(f"CUDA is available. Using GPU: {gpu_name}")
else:
    print("CUDA is not available. Using CPU.")

# print(device)

CUDA is not available. Using CPU.


## Load Data Set
*   Load the AircraftRecognition Dataset
*   Split into training and testing segments
  * The training segment is used for training the model, while the testing portion of the data is used to evaluate the accuracy of the model.

The dataset is extracted to the directory /AircraftRecognitionDataset. You need to create a local dirctory data folder. It contains 1 folder, images, containing all train and test set (10,000 images). Also it contains 19 text files which categroise the images. Let's verify this using os.listdir.

In [4]:
import zipfile

# Define the path to the zip file
zip_file_path = 'AircraftRecognitionDataset.zip'
extracted_folder_path = 'AircraftRecognitionDataset'  # Folder where you want to extract

# Check if the folder exists, if not, create it
if not os.path.exists(extracted_folder_path):
    os.makedirs(extracted_folder_path)

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder_path)

print(f'Zip file extracted to: {extracted_folder_path}')

# Set the path to the extracted 'dataoriginal' folder
data_dir = os.path.join(extracted_folder_path, 'dataoriginal')  # Path to the dataset folder inside extracted

# List the text files in 'dataoriginal'
txt_files = [f for f in os.listdir(data_dir) if f.endswith(".txt")]
print("Text files found:", txt_files)

FileNotFoundError: [Errno 2] No such file or directory: 'AircraftRecognitionDataset.zip'

## Load Dataset
We can use the ImageFolder class from torchvision to load the data as PyTorch tensors.

In [3]:
# Define the base directory path
base_dir = Path('./AircraftRecognitionDataset/dataoriginal')

# Initialize empty lists to store file names and labels for training, validation, and testing
train_file_names, train_labels = [], []
val_file_names, val_labels = [], []
test_file_names, test_labels = [], []

# Define the directory path for images
directory_path = base_dir / 'images'

# Define the paths to the text files containing the label to manufacturer mapping
mapping_files = {
    "train": base_dir / 'images_manufacturer_train.txt',
    "val": base_dir / 'images_manufacturer_val.txt',
    "test": base_dir / 'images_manufacturer_test.txt'
}

# Initialize dictionaries to store the mappings
label_to_manufacturer = {"train": {}, "val": {}, "test": {}}

# Read the mappings from the text files
for split, file_path in mapping_files.items():
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.strip().split()
            label = parts[0]
            manufacturer = ' '.join(parts[1:])
            label_to_manufacturer[split][label] = manufacturer

# Iterate through all image files in the specified directory
for file in tqdm(sorted(directory_path.glob('*.*g'))):  # Adjust the pattern if needed
    label = file.stem  # Extract the label from the file name (without extension)
    for split in ["train", "val", "test"]:
        if label in label_to_manufacturer[split]:
            manufacturer = label_to_manufacturer[split].get(label, 'Unknown')
            eval(f"{split}_labels").append(manufacturer)
            eval(f"{split}_file_names").append(str(file))

# Create pandas dataframes from the collected file names and labels
train_df = pd.DataFrame({"image": train_file_names, "label": train_labels})
val_df = pd.DataFrame({"image": val_file_names, "label": val_labels})
test_df = pd.DataFrame({"image": test_file_names, "label": test_labels})

# Print dataframe shapes
print(f"Train Data: {train_df.shape}")
print(f"Validation Data: {val_df.shape}")
print(f"Test Data: {test_df.shape}")

# Combine the dataframes into a single dataset
dataset_dict = DatasetDict({
    'train': Dataset.from_pandas(train_df),
    'validation': Dataset.from_pandas(val_df),
    'test': Dataset.from_pandas(test_df)
})

# Print the dataset in a table format
print("\nDatasetDict:")
print(f"{'Split':<12} {'Features':<20} {'Num Rows':<10}")
print(f"{'-'*12} {'-'*20} {'-'*10}")
for split, dataset in dataset_dict.items():
    print(f"{split:<12} {str(dataset.features):<20} {dataset.num_rows:<10}")

# Load in the families, manufacturers, variants files
files = {
    "families": base_dir / 'families.txt',
    "manufacturers": base_dir / 'manufacturers.txt',
    "variants": base_dir / 'variants.txt'
}

# Initialize lists to store the contents
contents = {key: [] for key in files}

# Read the files
for key, file_path in files.items():
    with open(file_path, "r") as f:
        contents[key] = [line.strip() for line in f]

# Print the first few entries to verify
print("\nFamilies, Manufacturers, Variants:")
print(f"{'File':<15} {'First 5 Entries':<50}")
print(f"{'-'*15} {'-'*50}")
for key in contents:
    print(f"{key.capitalize():<15} {contents[key][:5]}")

# Load in all the test files
test_files = {
    "family": base_dir / 'images_family_test.txt',
    "manufacturer": base_dir / 'images_manufacturer_test.txt',
    "test": base_dir / 'images_test.txt',
    "variant": base_dir / 'images_variant_test.txt'
}

# Initialize dictionaries to store the mappings
test_mappings = {key: {} for key in test_files if key != "test"}
test_image_ids = []

# Read the test files
for key, file_path in test_files.items():
    with open(file_path, "r") as f:
        if key == "test":
            test_image_ids = [line.strip() for line in f]
        else:
            for line in f:
                parts = line.strip().split(' ', 1)
                if len(parts) == 2:
                    image_id, label = parts
                    test_mappings[key][image_id] = label

# Print the first few entries to verify
print("\nTest:")
print(f"{'File':<20} {'First 5 Entries':<50}")
print(f"{'-'*20} {'-'*50}")
for i, (key, mapping) in enumerate(test_mappings.items(), 1):
    print(f"{i}. {key.capitalize():<20} {list(mapping.items())[:5]}")
print(f"{len(test_mappings) + 1}. Image IDs {'':<10} {test_image_ids[:5]}")

# Load in all the train files
train_files = {
    "family": base_dir / 'images_family_train.txt',
    "manufacturer": base_dir / 'images_manufacturer_train.txt',
    "train": base_dir / 'images_train.txt',
    "variant": base_dir / 'images_variant_train.txt'
}

# Initialize dictionaries to store the mappings
train_mappings = {key: {} for key in train_files if key != "train"}
train_image_ids = []

# Read the train files
for key, file_path in train_files.items():
    with open(file_path, "r") as f:
        if key == "train":
            train_image_ids = [line.strip() for line in f]
        else:
            for line in f:
                parts = line.strip().split(' ', 1)
                if len(parts) == 2:
                    image_id, label = parts
                    train_mappings[key][image_id] = label

# Print the first few entries to verify
print("\nTrain:")
print(f"{'File':<20} {'First 5 Entries':<50}")
print(f"{'-'*20} {'-'*50}")
for i, (key, mapping) in enumerate(train_mappings.items(), 1):
    print(f"{i}. {key.capitalize():<20} {list(mapping.items())[:5]}")
print(f"{len(train_mappings) + 1}. Image IDs {'':<10} {train_image_ids[:5]}")

# Load in the validation (val) files
val_files = {
    "family": base_dir / 'images_family_val.txt',
    "manufacturer": base_dir / 'images_manufacturer_val.txt',
    "val": base_dir / 'images_val.txt',
    "variant": base_dir / 'images_variant_val.txt'
}

# Initialize dictionaries to store the mappings
val_mappings = {key: {} for key in val_files if key != "val"}
val_image_ids = []

# Read the val files
for key, file_path in val_files.items():
    with open(file_path, "r") as f:
        if key == "val":
            val_image_ids = [line.strip() for line in f]
        else:
            for line in f:
                parts = line.strip().split(' ', 1)
                if len(parts) == 2:
                    image_id, label = parts
                    val_mappings[key][image_id] = label

# Print the first few entries to verify
print("\nValidation:")
print(f"{'File':<20} {'First 5 Entries':<50}")
print(f"{'-'*20} {'-'*50}")
for i, (key, mapping) in enumerate(val_mappings.items(), 1):
    print(f"{i}. {key.capitalize():<20} {list(mapping.items())[:5]}")
print(f"{len(val_mappings) + 1}. Image IDs {'':<10} {val_image_ids[:5]}")

# Load in the trainval files
trainval_files = {
    "family": base_dir / 'images_family_trainval.txt',
    "manufacturer": base_dir / 'images_manufacturer_trainval.txt',
    "variant": base_dir / 'images_variant_trainval.txt',
}

# Initialize dictionaries to store the mappings
trainval_mappings = {key: {} for key in trainval_files if key != "trainval"}
trainval_image_ids = []

# Read the trainval files
for key, file_path in trainval_files.items():
    with open(file_path, "r") as f:
        if key == "trainval":
            trainval_image_ids = [line.strip() for line in f]
        else:
            for line in f:
                parts = line.strip().split(' ', 1)
                if len(parts) == 2:
                    image_id, label = parts
                    trainval_mappings[key][image_id] = label

# Print the first few entries to verify
print("\nTrainval:")
print(f"{'File':<20} {'First 5 Entries':<50}")
print(f"{'-'*20} {'-'*50}")
for i, (key, mapping) in enumerate(trainval_mappings.items(), 1):
    print(f"{i}. {key.capitalize():<20} {list(mapping.items())[:5]}")

# Load in the box file
box_file = base_dir / 'images_box.txt'
box_mappings = {}

# Read the box file
with open(box_file, "r") as f:
    for line in f:
        parts = line.strip().split(' ', 1)
        if len(parts) == 2:
            image_id, box = parts
            box_mappings[image_id] = box

# Print the first few entries to verify
print("\nBox:")
print(f"{'File':<20} {'First 5 Entries':<50}")
print(f"{'-'*20} {'-'*50}")
print(f"1. Box {'':<20} {list(box_mappings.items())[:5]}")

100%|██████████| 10000/10000 [00:00<00:00, 93503.68it/s]

Train Data: (3334, 2)
Validation Data: (3333, 2)
Test Data: (3333, 2)

DatasetDict:
Split        Features             Num Rows  
------------ -------------------- ----------
train        {'image': Value(dtype='string', id=None), 'label': Value(dtype='string', id=None)} 3334      
validation   {'image': Value(dtype='string', id=None), 'label': Value(dtype='string', id=None)} 3333      
test         {'image': Value(dtype='string', id=None), 'label': Value(dtype='string', id=None)} 3333      

Families, Manufacturers, Variants:
File            First 5 Entries                                   
--------------- --------------------------------------------------
Families        ['A300', 'A310', 'A320', 'A330', 'A340']
Manufacturers   ['ATR', 'Airbus', 'Antonov', 'Beechcraft', 'Boeing']
Variants        ['707-320', '727-200', '737-200', '737-300', '737-400']

Test:
File                 First 5 Entries                                   
-------------------- -------------------------------------




Bounding box logic:

We have text files that map each image ID to a bounding box. We want to use just the cropped region, therefore:
*   Parse the bounding box (e.g., x1, y1, x2, y2).
*   Open the image and crop based on those coordinates before feeding it to the model.

Check the input data shape and labels.  Each element is a tuple, containing an image tensor and a label. Since the data consists of 256x256 px color images with 3 channels (RGB), each image tensor has the shape (3, 256, 256).

In [4]:
# 1) Parse bounding boxes:
parsed_box_mappings = {}
for img_id, coords_str in box_mappings.items():
    coords = list(map(int, coords_str.split()))
    if len(coords) == 4:
        x1, y1, x2, y2 = coords
        parsed_box_mappings[img_id] = (x1, y1, x2, y2)

def add_box_data(df):
    df['image_id'] = df['image'].apply(lambda p: Path(p).stem)
    df[['x1','y1','x2','y2']] = df['image_id'].apply(
        lambda i: pd.Series(parsed_box_mappings.get(i, (0,0,0,0)))
    )
    return df

train_df = add_box_data(train_df)
val_df   = add_box_data(val_df)
test_df  = add_box_data(test_df)

# 2) Combine label info from "trainval" file:
# Example: use trainval_mappings["manufacturer"] to gather all labels
all_manufacturers = list(trainval_mappings["manufacturer"].values())
unique_manufacturers = sorted(set(all_manufacturers))
label_map = {m: i for i, m in enumerate(unique_manufacturers)}

# 3) Convert string labels to numeric in each DataFrame:
train_df['label_int'] = train_df['label'].map(label_map)
val_df['label_int']   = val_df['label'].map(label_map)
test_df['label_int']  = test_df['label'].map(label_map)

print(train_df.head())

                                               image              label  \
0  AircraftRecognitionDataset\dataoriginal\images...  McDonnell Douglas   
1  AircraftRecognitionDataset\dataoriginal\images...             Boeing   
2  AircraftRecognitionDataset\dataoriginal\images...           Canadair   
3  AircraftRecognitionDataset\dataoriginal\images...         Beechcraft   
4  AircraftRecognitionDataset\dataoriginal\images...             Boeing   

  image_id   x1   y1   x2   y2  label_int  
0  0038598  117    2  969  724         21  
1  0038671    1  113  743  526          4  
2  0043892   25  115  969  726          7  
3  0048340   42  115  936  393          3  
4  0054367    2  180  839  538          4  


## Preprocess the dataset and augment
Preprocess the dataset by normalizing the pixel values between 0 and 1. 


In [11]:
random_seed = 42
torch.manual_seed(random_seed);

In [14]:
train_size = len(dataset_dict) - val_df

train_ds, val_ds = random_split(dataset_dict, [train_size, val_df])
len(train_ds), len(val_ds)

TypeError: unsupported operand type(s) for -: 'int' and 'str'

We can now create data loaders for training and validation, to load the data in batches

In [None]:
from torch.utils.data.dataloader import DataLoader

batch_size=128
epochs = 10

In [None]:
# test_loader is the portion within in test dataset
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(val_ds, batch_size*2, num_workers=4, pin_memory=True)

## Creating the model

In [7]:
# Define a simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define the number of classes (e.g., number of manufacturers)
num_classes = len(set([label[0] for label in train_dataset.image_labels.values()]))
model = SimpleCNN(num_classes)

In [None]:
# Define evaluation function
def evaluate(model, data_loader):
    model.eval()
    total, correct, total_loss = 0, 0, 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total, total_loss / len(data_loader)


In [None]:
# Initialize the model, loss function, and optimizer
model = CNNNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## train the model and keep track of the loss and accuracy over epochs

In [9]:
# Collect all unique labels
all_labels = set()
for labels in train_ds.image_labels.values():
    all_labels.update(labels)

# Create a mapping from label to index
label_to_index = {label: idx for idx, label in enumerate(sorted(all_labels))}

class CustomImageDataset(Dataset):
    def __init__(self, image_dir, annotations_file, label_to_index, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.label_to_index = label_to_index
        self.image_labels = self.load_annotations(annotations_file)
        self.image_paths = list(self.image_labels.keys())

    def load_annotations(self, annotations_file):
        image_labels = {}
        with open(annotations_file, 'r') as file:
            for line in file:
                parts = line.strip().split()
                image_name = parts[0]
                labels = parts[1:]  # Collect all labels (manufacturer, variant, etc.)
                image_labels[image_name] = labels
        return image_labels

    def __len__(self):
        return len(self.image_labels)

    def __getitem__(self, idx):
        image_name = self.image_paths[idx]
        img_path = os.path.join(self.image_dir, image_name + '.jpg')  # Assuming images have .jpg extension
        image = Image.open(img_path).convert("RGB")
        labels = self.image_labels[image_name]
        labels = [self.label_to_index[label] for label in labels]  # Convert labels to indices
        if self.transform:
            image = self.transform(image)
        return image, labels

# Define the base directory path
base_dir = Path('./AircraftRecognitionDataset/dataoriginal')

# Define the paths to the text files containing the label to manufacturer mapping
mapping_files = {
    "train": base_dir / 'images_manufacturer_train.txt',
    "val": base_dir / 'images_manufacturer_val.txt',
    "test": base_dir / 'images_manufacturer_test.txt'
}

# Define the transformations
transform = Compose([
    Resize((32, 32)),  # Resize all images to 32x32
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with mean and std
])

# Create datasets for train, validation, and test splits
train_ds = CustomImageDataset(base_dir / 'images', mapping_files['train'], label_to_index, transform=transform)
val_ds = CustomImageDataset(base_dir / 'images', mapping_files['val'], label_to_index, transform=transform)
test_ds = CustomImageDataset(base_dir / 'images', mapping_files['test'], label_to_index, transform=transform)

# Define batch size
batch_size = 32

# Create DataLoaders for each dataset
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=batch_size*2, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_ds, batch_size=batch_size*2, shuffle=False, num_workers=4, pin_memory=True)

# Define the CNN model
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define the number of classes (e.g., number of manufacturers)
num_classes = len(label_to_index)
model = SimpleCNN(num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        labels = [label[0] for label in labels]  # Use the first label (manufacturer)
        labels = torch.tensor(labels)  # Convert labels to tensor
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            labels = [label[0] for label in labels]  # Use the first label (manufacturer)
            labels = torch.tensor(labels)  # Convert labels to tensor
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Validation Accuracy: {100 * correct / total}%")

NameError: name 'train_ds' is not defined

In [None]:
from torchsummary import summary
# Summarize the model
print(summary(model, (3, 32, 32)))

In [None]:
# Train the model
history = train_model(model, train_loader, test_loader, epochs=10)

## visualize the training and test accuracy and loss

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
# Plot the training and test accuracy and loss
plt.figure(figsize=(12, 5))

# Plot Accuracy
plt.subplot(1, 2, 1)
plt.plot(history['train_acc'], label='Train Accuracy')
plt.plot(history['test_acc'], label='Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Train vs Test Accuracy')
plt.legend()

# Plot Loss
plt.subplot(1, 2, 2)
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['test_loss'], label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Train vs Test Loss')
plt.legend()

plt.show()

## Task 2
In this task you wil train the MSTR dataset using pytorch and train a Convolutional Neural Network (CNN) to classify the MSTR dataset using weighted F1 score. You should train the model using a GPU if available.

In [21]:
import os
import zipfile
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
!pip install scikit-learn
from sklearn.metrics import f1_score
from torch.utils.data import DataLoader

# Paths 
dataset_path = './AircraftRecognitionDataset'  # Path to the dataset file
labels_path = './AircraftRecognitionDataset'  # Path to the labels file

[0m

In [22]:
# Preprocess data
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize images
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize([0.5], [0.5])  # Normalize
])

# Load dataset
dataset = datasets.ImageFolder(dataset_path, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [23]:
# Custom CNN Model
class MSTRCNN(nn.Module):
    def __init__(self, num_classes):
        super(MSTRCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 32 * 32, 128)  # Adjust based on input size
        self.fc2 = nn.Linear(128, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [24]:
# Initialize model
num_classes = len(dataset.classes)
model = MSTRCNN(num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 5
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(dataloader)}")

# Evaluation loop
model.eval()
true_labels = []
predicted_labels = []
with torch.no_grad():
    for inputs, labels in dataloader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(preds.cpu().numpy())

Epoch 1/5, Loss: 0.0


KeyboardInterrupt: 

In [None]:
# Compute F1-score
average_f1 = f1_score(true_labels, predicted_labels, average='weighted')
print(f"Average F1-Score: {average_f1}")