In [1]:
####################################


#THIS FIRST SECTION WILL IMPORT THE TRAINING DATA AND SETUP THE INFORMATION FOR TRAINING


####################################

In [None]:
#import libraries you used 
import os
import pandas as pd
import zipfile
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import tarfile
from PIL import Image
from torchvision.datasets.utils import download_url
from torch.utils.data import random_split
from torchvision import transforms
import shutil
from sklearn.metrics import f1_score


## Import the data into the document
image_dir = '/Users/louieburns/Library/CloudStorage/OneDrive-UniversityofLeeds/Year 3/AI and Machine Learning/Term 1/Coursework 1/Actual Coursework/dataoriginal/images'
image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]


## CREATE A FILE WITH THE DESIRED IMAGES ONLY

# Define the classifications to filter and their numeric mappings
classification_mapping = {
    "Boeing 707": 0,
    "Boeing 747": 1,
    "A310": 2,
    "Beechcraft 1900": 3,
    "Gulfstream": 4
}

# Define the file path
file_path = 'images_family_train.txt'
# Read the file and handle inconsistent line structures
data = []
with open(file_path, 'r') as file:
    for i, line in enumerate(file):
        parts = line.strip().split()
        if len(parts) >= 2:  # Ensure at least two fields exist
            number = parts[0]
            classification = ' '.join(parts[1:])  # Combine remaining parts
            data.append({"Number": number, "Classification": classification})
        else:
            print(f"Skipping invalid line {i + 1}: {line.strip()}")  # Debugging invalid lines

# Convert to a DataFrame
df = pd.DataFrame(data)

# Standardise the Classification column
df['Classification'] = df['Classification'].str.strip()  # Remove extra spaces
df['Classification'] = df['Classification'].str.title()  # Standardise capitalisation

# Add numeric labels and filter invalid classifications
df['Label'] = df['Classification'].map(classification_mapping)
df = df.dropna(subset=['Label']).reset_index(drop=True)  # Remove rows with invalid classifications

# Display the resulting DataFrame
print(df)

from collections import Counter

# Read the file and collect classifications
classifications = []
with open(file_path, 'r') as file:
    for line in file:
        parts = line.strip().split()
        if len(parts) >= 2:  # Ensure there are at least two fields
            classification = ' '.join(parts[1:]).strip()  # Combine fields after the first
            classifications.append(classification)

# Count the occurrences of each classification
classification_counts = Counter(classifications)

# Display the results
print("Classification Counts:")
for classification, count in classification_counts.items():
    print(f"{classification}: {count}")

# Count the occurrences of each classification in the filtered DataFrame
filtered_classification_counts = df['Classification'].value_counts()

# Display the results
print("Filtered Classification Counts:")
print(filtered_classification_counts)




## Create a folder with only the photos to keep

# Path to your folder containing images
image_folder_path = '/Users/louieburns/Library/CloudStorage/OneDrive-UniversityofLeeds/Year 3/AI and Machine Learning/Term 1/Coursework 1/Actual Coursework/dataoriginal/images'


relevant_codes = set(df['Number'])  # Convert to a set for faster lookup

# Initialize a list to hold image tensors
image_tensors = []

# Define the image transformation (resize and convert to tensor)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to a fixed size (e.g., 224x224)
    transforms.ToTensor()  # Convert image to PyTorch tensor
])

# Iterate over files in the folder
for image_file in os.listdir(image_folder_path):
    # Extract the numeric part of the file name (assuming it's the code)
    code = os.path.splitext(image_file)[0]
    if code in relevant_codes and image_file.endswith('.jpg'):
        # Load the image
        image_path = os.path.join(image_folder_path, image_file)
        img = Image.open(image_path).convert('RGB')  # Ensure 3 color channels (RGB)
        
        # Transform the image and append to the list
        tensor = transform(img)
        image_tensors.append(tensor)
        print(f"Loaded and transformed: {image_file}")

# Stack all tensors into a single tensor
if image_tensors:
    image_tensor_batch = torch.stack(image_tensors)
    print(f"Created a tensor of shape: {image_tensor_batch.shape}")
else:
    print("No images matched the criteria!")

# Example tensor shape: [num_images, channels, height, width]

In [None]:
####################################


#THIS SECOND SECTION WILL IMPORT THE TESTING DATA AND SETUP THE INFORMATION FOR F1 TESTING


####################################

In [None]:
# Define the classifications and their numeric mappings
classification_mapping = {
    "Boeing 707": 0,
    "Boeing 747": 1,
    "A310": 2,
    "Beechcraft 1900": 3,
    "Gulfstream": 4
}

# File path for the testing data classification file
test_file_path = '/Users/louieburns/Library/CloudStorage/OneDrive-UniversityofLeeds/Year 3/AI and Machine Learning/Term 1/Coursework 1/Actual Coursework/dataoriginal/images_family_test.txt'

# Read and process the testing data file
test_data = []
with open(test_file_path, 'r') as file:
    for i, line in enumerate(file):
        parts = line.strip().split()
        if len(parts) >= 2:  # Ensure there are at least two fields
            number = parts[0]
            classification = ' '.join(parts[1:])  # Combine remaining parts
            test_data.append({"Number": number, "Classification": classification})
        else:
            print(f"Skipping invalid line {i + 1}: {line.strip()}")  # Debugging invalid lines

# Convert to a DataFrame
df_test = pd.DataFrame(test_data)

# Standardise the Classification column
df_test['Classification'] = df_test['Classification'].str.strip()  # Remove extra spaces
df_test['Classification'] = df_test['Classification'].str.title()  # Standardise capitalisation

# Add numeric labels and filter invalid classifications
df_test['Label'] = df_test['Classification'].map(classification_mapping)
df_test = df_test.dropna(subset=['Label']).reset_index(drop=True)  # Remove rows with invalid classifications

# Display the resulting DataFrame
print(df_test)

# Path to the testing images
test_image_folder_path = '/Users/louieburns/Library/CloudStorage/OneDrive-UniversityofLeeds/Year 3/AI and Machine Learning/Term 1/Coursework 1/Actual Coursework/dataoriginal/images'

# Relevant test image codes
relevant_test_codes = set(df_test['Number'])  # Convert to a set for faster lookup

# Initialize a list to hold testing image tensors
test_image_tensors = []

# Define the image transformation (resize and convert to tensor)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to a fixed size (e.g., 224x224)
    transforms.ToTensor()  # Convert image to PyTorch tensor
])

# Iterate over files in the folder
for image_file in os.listdir(test_image_folder_path):
    # Extract the numeric part of the file name (assuming it's the code)
    code = os.path.splitext(image_file)[0]
    if code in relevant_test_codes and image_file.endswith('.jpg'):
        # Load the image
        image_path = os.path.join(test_image_folder_path, image_file)
        img = Image.open(image_path).convert('RGB')  # Ensure 3 color channels (RGB)
        
        # Transform the image and append to the list
        tensor = transform(img)
        test_image_tensors.append(tensor)
        print(f"Loaded and transformed: {image_file}")

# Stack all tensors into a single tensor
if test_image_tensors:
    test_image_tensor_batch = torch.stack(test_image_tensors)
    print(f"Created a testing tensor of shape: {test_image_tensor_batch.shape}")
else:
    print("No testing images matched the criteria!")

# Example tensor shape: [num_test_images, channels, height, width]


In [None]:
####################################


#THIS 3rd SECTION WILL SETUP AND TRAIN THE NEURAL NETWORK

####################################

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class BasicCNN(nn.Module):
    def __init__(self, num_classes=5):  # Default is 5 classes
        super(BasicCNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling
        
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 56 * 56, 128)  # Adjust input size for flattened conv output
        self.fc2 = nn.Linear(128, num_classes)  # Output layer
        
    def forward(self, x):
        # Apply convolutional layers with ReLU activation and pooling
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        
        # Flatten the tensor for fully connected layers
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x

# Instantiate the CNN model
cnn_model = BasicCNN(num_classes=len(classification_mapping))
print(cnn_model)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.001)

# Updated training function for CNN
def train_cnn_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            # Move tensors to device if GPU is available (optional, if using CUDA)
            # images, labels = images.to('cuda'), labels.to('cuda')

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)  # No need to flatten images for CNN
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Track loss
            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

# Example DataLoader creation
from torch.utils.data import DataLoader, TensorDataset

# Prepare dataset and loader
train_dataset = TensorDataset(image_tensor_batch, torch.tensor(df['Label'].values, dtype=torch.long))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Train the CNN model
train_cnn_model(cnn_model, train_loader, criterion, optimizer, num_epochs=10)




In [None]:
####################################


#THIS 4th SECTION COMPLETES AN F1 TEST ON THE TRAINING DATA

####################################

In [None]:
# Define the testing function for CNN
def evaluate_cnn_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    all_labels = []
    all_preds = []
    
    with torch.no_grad():  # No gradient calculation needed during testing
        for images, labels in test_loader:
            # Move tensors to device if GPU is available (optional, if using CUDA)
            # images, labels = images.to('cuda'), labels.to('cuda')

            # Forward pass
            outputs = model(images)  # CNN handles 2D/3D images directly
            
            # Get predictions (class with the highest score)
            _, preds = torch.max(outputs, 1)
            
            # Store predictions and labels
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate Weighted F1-Score
    f1 = f1_score(all_labels, all_preds, average='weighted')
    print(f"Weighted F1-Score: {f1}")
    
    return f1

# Prepare DataLoader for testing data
test_dataset = TensorDataset(test_image_tensor_batch, torch.tensor(df_test['Label'].values, dtype=torch.long))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluate the CNN model on testing data
f1_score_test = evaluate_cnn_model(cnn_model, test_loader)

