In [29]:
####################################


#THIS FIRST SECTION WILL IMPORT THE TRAINING DATA AND SETUP THE INFORMATION FOR TRAINING


####################################

In [30]:
#import libraries you used 
import os
import pandas as pd
import zipfile
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import tarfile
from PIL import Image
from torchvision.datasets.utils import download_url
from torch.utils.data import random_split
from torchvision import transforms
import shutil
from sklearn.metrics import f1_score


## Import the data into the document
image_dir = '/Users/louieburns/Library/CloudStorage/OneDrive-UniversityofLeeds/Year 3/AI and Machine Learning/Term 1/Coursework 1/Actual Coursework/dataoriginal/images'
image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]


## CREATE A FILE WITH THE DESIRED IMAGES ONLY

# Define the classifications to filter and their numeric mappings
classification_mapping = {
    "Boeing 707": 0,
    "Boeing 747": 1,
    "A310": 2,
    "Beechcraft 1900": 3,
    "Gulfstream": 4
}

# Define the file path
file_path = 'images_family_train.txt'
# Read the file and handle inconsistent line structures
data = []
with open(file_path, 'r') as file:
    for i, line in enumerate(file):
        parts = line.strip().split()
        if len(parts) >= 2:  # Ensure at least two fields exist
            number = parts[0]
            classification = ' '.join(parts[1:])  # Combine remaining parts
            data.append({"Number": number, "Classification": classification})
        else:
            print(f"Skipping invalid line {i + 1}: {line.strip()}")  # Debugging invalid lines

# Convert to a DataFrame
df = pd.DataFrame(data)

# Standardise the Classification column
df['Classification'] = df['Classification'].str.strip()  # Remove extra spaces
df['Classification'] = df['Classification'].str.title()  # Standardise capitalisation

# Add numeric labels and filter invalid classifications
df['Label'] = df['Classification'].map(classification_mapping)
df = df.dropna(subset=['Label']).reset_index(drop=True)  # Remove rows with invalid classifications

# Display the resulting DataFrame
print(df)

from collections import Counter

# Read the file and collect classifications
classifications = []
with open(file_path, 'r') as file:
    for line in file:
        parts = line.strip().split()
        if len(parts) >= 2:  # Ensure there are at least two fields
            classification = ' '.join(parts[1:]).strip()  # Combine fields after the first
            classifications.append(classification)

# Count the occurrences of each classification
classification_counts = Counter(classifications)

# Display the results
print("Classification Counts:")
for classification, count in classification_counts.items():
    print(f"{classification}: {count}")

# Count the occurrences of each classification in the filtered DataFrame
filtered_classification_counts = df['Classification'].value_counts()

# Display the results
print("Filtered Classification Counts:")
print(filtered_classification_counts)




## Create a folder with only the photos to keep

# Path to your folder containing images
image_folder_path = '/Users/louieburns/Library/CloudStorage/OneDrive-UniversityofLeeds/Year 3/AI and Machine Learning/Term 1/Coursework 1/Actual Coursework/dataoriginal/images'


relevant_codes = set(df['Number'])  # Convert to a set for faster lookup

# Initialize a list to hold image tensors
image_tensors = []

# Define the image transformation (resize and convert to tensor)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to a fixed size (e.g., 224x224)
    transforms.ToTensor()  # Convert image to PyTorch tensor
])

# Iterate over files in the folder
for image_file in os.listdir(image_folder_path):
    # Extract the numeric part of the file name (assuming it's the code)
    code = os.path.splitext(image_file)[0]
    if code in relevant_codes and image_file.endswith('.jpg'):
        # Load the image
        image_path = os.path.join(image_folder_path, image_file)
        img = Image.open(image_path).convert('RGB')  # Ensure 3 color channels (RGB)
        
        # Transform the image and append to the list
        tensor = transform(img)
        image_tensors.append(tensor)
        print(f"Loaded and transformed: {image_file}")

# Stack all tensors into a single tensor
if image_tensors:
    image_tensor_batch = torch.stack(image_tensors)
    print(f"Created a tensor of shape: {image_tensor_batch.shape}")
else:
    print("No images matched the criteria!")

# Example tensor shape: [num_images, channels, height, width]

      Number Classification  Label
0    1025794     Boeing 707    0.0
1    1340192     Boeing 707    0.0
2    0056978     Boeing 707    0.0
3    0698580     Boeing 707    0.0
4    0450014     Boeing 707    0.0
..       ...            ...    ...
295  1857543     Gulfstream    4.0
296  0517785     Gulfstream    4.0
297  1311419     Gulfstream    4.0
298  2106202     Gulfstream    4.0
299  0497885     Gulfstream    4.0

[300 rows x 3 columns]
Classification Counts:
Boeing 707: 34
Boeing 727: 33
Boeing 737: 267
Boeing 747: 133
Boeing 757: 67
Boeing 767: 100
Boeing 777: 66
A300: 34
A310: 33
A320: 133
A330: 67
A340: 133
A380: 34
ATR-42: 33
ATR-72: 33
An-12: 34
BAE 146: 66
BAE-125: 34
Beechcraft 1900: 33
Boeing 717: 33
C-130: 34
C-47: 33
CRJ-200: 33
CRJ-700: 67
Cessna 172: 33
Cessna 208: 34
Cessna Citation: 66
Challenger 600: 34
DC-10: 33
DC-3: 33
DC-6: 34
DC-8: 33
DC-9: 33
DH-82: 34
DHC-1: 33
DHC-6: 33
Dash 8: 67
DR-400: 33
Dornier 328: 34
Embraer E-Jet: 100
EMB-120: 33
Embraer ERJ 145: 67
E

In [31]:
####################################


#THIS SECOND SECTION WILL IMPORT THE TESTING DATA AND SETUP THE INFORMATION FOR F1 TESTING


####################################

In [32]:
# Define the classifications and their numeric mappings
classification_mapping = {
    "Boeing 707": 0,
    "Boeing 747": 1,
    "A310": 2,
    "Beechcraft 1900": 3,
    "Gulfstream": 4
}

# File path for the testing data classification file
test_file_path = '/Users/louieburns/Library/CloudStorage/OneDrive-UniversityofLeeds/Year 3/AI and Machine Learning/Term 1/Coursework 1/Actual Coursework/dataoriginal/images_family_test.txt'

# Read and process the testing data file
test_data = []
with open(test_file_path, 'r') as file:
    for i, line in enumerate(file):
        parts = line.strip().split()
        if len(parts) >= 2:  # Ensure there are at least two fields
            number = parts[0]
            classification = ' '.join(parts[1:])  # Combine remaining parts
            test_data.append({"Number": number, "Classification": classification})
        else:
            print(f"Skipping invalid line {i + 1}: {line.strip()}")  # Debugging invalid lines

# Convert to a DataFrame
df_test = pd.DataFrame(test_data)

# Standardise the Classification column
df_test['Classification'] = df_test['Classification'].str.strip()  # Remove extra spaces
df_test['Classification'] = df_test['Classification'].str.title()  # Standardise capitalisation

# Add numeric labels and filter invalid classifications
df_test['Label'] = df_test['Classification'].map(classification_mapping)
df_test = df_test.dropna(subset=['Label']).reset_index(drop=True)  # Remove rows with invalid classifications

# Display the resulting DataFrame
print(df_test)

# Path to the testing images
test_image_folder_path = '/Users/louieburns/Library/CloudStorage/OneDrive-UniversityofLeeds/Year 3/AI and Machine Learning/Term 1/Coursework 1/Actual Coursework/dataoriginal/images'

# Relevant test image codes
relevant_test_codes = set(df_test['Number'])  # Convert to a set for faster lookup

# Initialize a list to hold testing image tensors
test_image_tensors = []

# Define the image transformation (resize and convert to tensor)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to a fixed size (e.g., 224x224)
    transforms.ToTensor()  # Convert image to PyTorch tensor
])

# Iterate over files in the folder
for image_file in os.listdir(test_image_folder_path):
    # Extract the numeric part of the file name (assuming it's the code)
    code = os.path.splitext(image_file)[0]
    if code in relevant_test_codes and image_file.endswith('.jpg'):
        # Load the image
        image_path = os.path.join(test_image_folder_path, image_file)
        img = Image.open(image_path).convert('RGB')  # Ensure 3 color channels (RGB)
        
        # Transform the image and append to the list
        tensor = transform(img)
        test_image_tensors.append(tensor)
        print(f"Loaded and transformed: {image_file}")

# Stack all tensors into a single tensor
if test_image_tensors:
    test_image_tensor_batch = torch.stack(test_image_tensors)
    print(f"Created a testing tensor of shape: {test_image_tensor_batch.shape}")
else:
    print("No testing images matched the criteria!")

# Example tensor shape: [num_test_images, channels, height, width]


      Number Classification  Label
0    1514522     Boeing 707    0.0
1    0747566     Boeing 707    0.0
2    1008575     Boeing 707    0.0
3    0717480     Boeing 707    0.0
4    0991569     Boeing 707    0.0
..       ...            ...    ...
294  0447762     Gulfstream    4.0
295  0396507     Gulfstream    4.0
296  2030079     Gulfstream    4.0
297  1063148     Gulfstream    4.0
298  0492487     Gulfstream    4.0

[299 rows x 3 columns]
Loaded and transformed: 0812092.jpg
Loaded and transformed: 1698699.jpg
Loaded and transformed: 1864991.jpg
Loaded and transformed: 0574359.jpg
Loaded and transformed: 2211444.jpg
Loaded and transformed: 1663953.jpg
Loaded and transformed: 0869743.jpg
Loaded and transformed: 1272698.jpg
Loaded and transformed: 0918179.jpg
Loaded and transformed: 1592286.jpg
Loaded and transformed: 1767102.jpg
Loaded and transformed: 0066420.jpg
Loaded and transformed: 0367786.jpg
Loaded and transformed: 1686317.jpg
Loaded and transformed: 0727357.jpg
Loaded and trans

In [33]:
####################################


#THIS 3rd SECTION WILL SETUP AND TRAIN THE NEURAL NETWORK

####################################

In [34]:
import torch.nn as nn
import torch.nn.functional as F

# Define a simple neural network architecture
class SimpleAircraftClassifier(nn.Module):
    def __init__(self, input_size=3*224*224, num_classes=5):  # Default assumes 224x224 RGB images
        super(SimpleAircraftClassifier, self).__init__()
        
# Add another hidden layer
        self.fc1 = nn.Linear(input_size, 256)  # First fully connected layer
        self.fc2 = nn.Linear(256, 128)        # Second fully connected layer
        self.fc3 = nn.Linear(128, num_classes)  # Output layer
        
    def forward(self, x):
        # Flatten the input
        x = x.view(x.size(0), -1)  # Reshape the tensor to (batch_size, input_size)
        
        # Apply fully connected layers with ReLU activation
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

# Instantiate the model
input_size = 3 * 224 * 224  # Flattened size of 224x224 RGB images
num_classes = len(classification_mapping)  # Number of classes (5 based on your mapping)
simple_model = SimpleAircraftClassifier(input_size=input_size, num_classes=num_classes)

# Print model summary
print(simple_model)


# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(simple_model.parameters(), lr=0.001)

# Training function remains the same
def train_simple_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            # Flatten images to fit the simple model
            images = images.view(images.size(0), -1)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            # Track loss
            running_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

# Example DataLoader creation
from torch.utils.data import DataLoader, TensorDataset

# Prepare dataset and loader
train_dataset = TensorDataset(image_tensor_batch, torch.tensor(df['Label'].values, dtype=torch.long))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


# Train the simple model
train_simple_model(simple_model, train_loader, criterion, optimizer, num_epochs=10)



SimpleAircraftClassifier(
  (fc1): Linear(in_features=150528, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=5, bias=True)
)
Epoch 1/10, Loss: 16.222561132907867
Epoch 2/10, Loss: 7.907152795791626
Epoch 3/10, Loss: 4.617014455795288
Epoch 4/10, Loss: 2.073240226507187
Epoch 5/10, Loss: 2.018816518783569
Epoch 6/10, Loss: 2.0273361206054688
Epoch 7/10, Loss: 1.533750557899475
Epoch 8/10, Loss: 1.314308762550354
Epoch 9/10, Loss: 1.224974811077118
Epoch 10/10, Loss: 1.5893061876296997


In [35]:
####################################


#THIS 4th SECTION COMPLETES AN F1 TEST ON THE TRAINING DATA

####################################

In [36]:
# Define the testing function
def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    all_labels = []
    all_preds = []
    
    with torch.no_grad():  # No gradient calculation needed during testing
        for images, labels in test_loader:
            # Flatten images for the simple model
            images = images.view(images.size(0), -1)  # Reshape images for simple model
            
            # Forward pass
            outputs = model(images)
            
            # Get predictions (class with the highest score)
            _, preds = torch.max(outputs, 1)
            
            # Store predictions and labels
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate Weighted F1-Score
    f1 = f1_score(all_labels, all_preds, average='weighted')
    print(f"Weighted F1-Score: {f1}")
    
    return f1

# Prepare DataLoader for testing data
test_dataset = TensorDataset(test_image_tensor_batch, torch.tensor(df_test['Label'].values, dtype=torch.long))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluate the model on testing data
f1_score_test = evaluate_model(simple_model, test_loader)


Weighted F1-Score: 0.2758334617446431
