In [9]:
# Load libraries

import os
import torch
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
import shutil
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from ultralytics import YOLO
from sklearn.model_selection import train_test_split

In [10]:
# # Clears CUDA memory cache
# torch.cuda.empty_cache()
# torch.cuda.reset_peak_memory_stats()
# torch.cuda.reset_accumulated_memory_stats()

### Prepare data for YOLO model input

In [11]:
# Base directories
data_directory = 'data'
output_directory = os.path.join('model_outputs_data', 'yolo_output')

# Paths to data
train_csv_path = os.path.join(data_directory, 'train.csv')
test_csv_path = os.path.join(data_directory, 'test.csv')
images_train_path = os.path.join(data_directory, 'images_train')
images_test_path = os.path.join(data_directory, 'images_test')

# Create necessary directories
def create_directories(path_list):
    for path in path_list:
        os.makedirs(path, exist_ok=True)

# Setup directories for training and validation
train_dir = os.path.join(output_directory, 'train')
val_dir = os.path.join(output_directory, 'val')
train_0_dir = os.path.join(train_dir, '0')
train_1_dir = os.path.join(train_dir, '1')
val_0_dir = os.path.join(val_dir, '0')
val_1_dir = os.path.join(val_dir, '1')

# Create all needed folders
create_directories([train_dir, val_dir, train_0_dir, train_1_dir, val_0_dir, val_1_dir])

# Method to copy images based on a dataframe, source directory, and target base directory
def copy_images(df, src_dir, target_base_dir):
    for idx, row in df.iterrows():
        file_name = f"{row['id']}.png"
        src_file_path = os.path.join(src_dir, file_name)
        if row['ground_truth'] == 1:
            dst_file_path = os.path.join(target_base_dir, '1', file_name)
        else:
            dst_file_path = os.path.join(target_base_dir, '0', file_name)
        shutil.copy(src_file_path, dst_file_path)

# Load the data
train_df = pd.read_csv(train_csv_path)
test_df = pd.read_csv(test_csv_path)

# Split train_df further to create a validation set or use test_df as validation
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

# Copy images according to the csv files
copy_images(train_df, images_train_path, train_dir)
copy_images(val_df, images_train_path, val_dir)  # If using part of train data as val
# copy_images(test_df, images_test_path, val_dir)  # Uncomment if test_df should be used as validation

print("Data organization complete.")


Data organization complete.


### Select device
I'm using my local GPU with Torch+CUDA for this because it is way faster than using CPU

In [12]:
# Select the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


### Load and Train the model

In [19]:
# Load and Train the model
model = YOLO('pre_trained_models/yolov8x-cls.pt')  # Yolo8
# model = YOLO(os.path.join(output_directory, 'pre_trained_models/yolov8x-cls.pt'))  # Yolo8
model.to(device)

train_results = model.train(data=output_directory, epochs=20, seed=42, device=device)

New https://pypi.org/project/ultralytics/8.3.33 available  Update with 'pip install -U ultralytics'
[34m[1mengine\trainer: [0mtask=classify, mode=train, model=pre_trained_models/yolov8x-cls.pt, data=model_outputs_data\yolo_output, epochs=20, time=None, patience=100, batch=16, imgsz=224, save=True, save_period=-1, cache=False, device=cuda, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=42, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False,

  self.scaler = torch.cuda.amp.GradScaler(enabled=self.amp)
[34m[1mtrain: [0mScanning C:\Projects\CS6640 - Artificial Neural Networks\Project\CS6640_Project\model_outputs_data\yolo_output\train... 2720 images, 0 corrupt: 100%|██████████| 2720/2720 [00:00<?, ?it/s]
[34m[1mval: [0mScanning C:\Projects\CS6640 - Artificial Neural Networks\Project\CS6640_Project\model_outputs_data\yolo_output\val... 681 images, 0 corrupt: 100%|██████████| 681/681 [00:00<?, ?it/s]


[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 50 weight(decay=0.0), 51 weight(decay=0.0005), 51 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 224 train, 224 val
Using 8 dataloader workers
Logging results to [1mruns\classify\train2[0m
Starting training for 20 epochs...

      Epoch    GPU_mem       loss  Instances       Size


       1/20      4.39G     0.4909         16        224: 100%|██████████| 170/170 [00:19<00:00,  8.80it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.94it/s]

                   all      0.875          1






      Epoch    GPU_mem       loss  Instances       Size


       2/20      3.38G     0.4759         16        224: 100%|██████████| 170/170 [00:17<00:00,  9.88it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.66it/s]

                   all      0.686          1






      Epoch    GPU_mem       loss  Instances       Size


       3/20       3.4G     0.5066         16        224: 100%|██████████| 170/170 [00:16<00:00, 10.61it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.24it/s]

                   all       0.84          1






      Epoch    GPU_mem       loss  Instances       Size


       4/20      3.41G     0.5257         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.73it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.84it/s]

                   all      0.824          1






      Epoch    GPU_mem       loss  Instances       Size


       5/20      3.41G     0.4859         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.78it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.79it/s]

                   all      0.838          1






      Epoch    GPU_mem       loss  Instances       Size


       6/20      3.41G     0.4218         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.75it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.80it/s]

                   all      0.843          1






      Epoch    GPU_mem       loss  Instances       Size


       7/20      3.41G     0.4065         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.73it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.63it/s]

                   all      0.878          1






      Epoch    GPU_mem       loss  Instances       Size


       8/20      3.41G     0.3706         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.74it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.57it/s]

                   all      0.902          1






      Epoch    GPU_mem       loss  Instances       Size


       9/20      3.41G      0.338         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.72it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.78it/s]

                   all      0.899          1






      Epoch    GPU_mem       loss  Instances       Size


      10/20       3.4G     0.3304         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.71it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.65it/s]

                   all      0.921          1






      Epoch    GPU_mem       loss  Instances       Size


      11/20      3.41G     0.3146         16        224: 100%|██████████| 170/170 [00:16<00:00, 10.53it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.88it/s]

                   all      0.887          1






      Epoch    GPU_mem       loss  Instances       Size


      12/20       3.4G     0.2804         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.77it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.87it/s]

                   all      0.928          1






      Epoch    GPU_mem       loss  Instances       Size


      13/20      3.41G     0.3062         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.79it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.85it/s]

                   all      0.934          1






      Epoch    GPU_mem       loss  Instances       Size


      14/20       3.4G     0.2704         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.66it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.82it/s]

                   all      0.912          1






      Epoch    GPU_mem       loss  Instances       Size


      15/20      3.41G     0.2484         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.74it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.81it/s]


                   all      0.924          1

      Epoch    GPU_mem       loss  Instances       Size


      16/20       3.4G      0.243         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.71it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.77it/s]

                   all      0.922          1






      Epoch    GPU_mem       loss  Instances       Size


      17/20      3.41G     0.2168         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.72it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 18.85it/s]

                   all      0.927          1






      Epoch    GPU_mem       loss  Instances       Size


      18/20       3.4G     0.1786         16        224: 100%|██████████| 170/170 [00:16<00:00, 10.55it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.86it/s]

                   all      0.944          1






      Epoch    GPU_mem       loss  Instances       Size


      19/20      3.41G     0.1842         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.73it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.85it/s]

                   all       0.94          1






      Epoch    GPU_mem       loss  Instances       Size


      20/20       3.4G     0.1743         16        224: 100%|██████████| 170/170 [00:15<00:00, 10.70it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.87it/s]

                   all      0.941          1






20 epochs completed in 0.133 hours.
Optimizer stripped from runs\classify\train2\weights\last.pt, 112.5MB
Optimizer stripped from runs\classify\train2\weights\best.pt, 112.5MB

Validating runs\classify\train2\weights\best.pt...
Ultralytics YOLOv8.2.82  Python-3.11.6 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLOv8x-cls summary (fused): 133 layers, 56,125,762 parameters, 0 gradients, 153.8 GFLOPs
[34m[1mtrain:[0m C:\Projects\CS6640 - Artificial Neural Networks\Project\CS6640_Project\model_outputs_data\yolo_output\train... found 2720 images in 2 classes  
[34m[1mval:[0m C:\Projects\CS6640 - Artificial Neural Networks\Project\CS6640_Project\model_outputs_data\yolo_output\val... found 681 images in 2 classes  
[34m[1mtest:[0m None...


               classes   top1_acc   top5_acc: 100%|██████████| 22/22 [00:01<00:00, 19.87it/s]


                   all      0.944          1
Speed: 0.1ms preprocess, 1.5ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns\classify\train2[0m
Results saved to [1mruns\classify\train2[0m


### Prediction and evaluation

In [20]:
# Import best model
best_model = YOLO('runs/classify/train/weights/best.pt')

In [21]:
# Predict
test_results = best_model.predict(source=images_test_path, save=True, device=device)



errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs
image 1/1459 C:\Projects\CS6640 - Artificial Neural Networks\Project\CS6640_Project\data\images_test\1.png: 224x224 1 1.00, 0 0.00, 7.0ms
image 2/1459 C:\Projects\CS6640 - Artificial Neural Networks\Project\CS6640_Project\data\images_test\100.png: 224x224 1 1.00, 0 0.00, 6.0ms
image 3/1459 C:\Projects\CS6640 - Artificial Neural Networks\Project\CS6640_Project\data\images_test\1004.png: 224x224 0 0.96, 1 0.04, 6.0ms
image 4/1459 C:\Projects\CS6640 - Artificial Neural Networks\Project\CS6640_Project\data\images_test\1006.png: 224x224 0 1.00, 1 0.00, 6.0ms
image 5/1459 

In [22]:
# Prediction data preparation

image_id = []
image_prediction = []

for result in test_results:
    image_id.append(os.path.basename(result.path)[:-4])
    image_prediction.append(result.probs.top1)
    
# Loading testing csv
test_df = pd.read_csv(test_csv_path)

# Create a DataFrame from predictions
predictions_df = pd.DataFrame({
    'id': image_id,
    'predicted': image_prediction
})

# Convert IDs in test_df to string (to validate/match predictions_df)
test_df['id'] = test_df['id'].astype(str)

# Merge the predictions with the ground truths
results_df = pd.merge(test_df, predictions_df, on='id', how='left')

# Ensure no missing predictions
# results_df['predicted'].fillna(0, inplace=True)

In [23]:
# Calculate evaluation metrics
accuracy = accuracy_score(results_df['ground_truth'], results_df['predicted'])
precision = precision_score(results_df['ground_truth'], results_df['predicted'], average='macro')
recall = recall_score(results_df['ground_truth'], results_df['predicted'], average='macro')
f1 = f1_score(results_df['ground_truth'], results_df['predicted'], average='macro')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.94
Precision: 0.94
Recall: 0.94
F1 Score: 0.94


In [24]:
# Confusion matrix
cm = confusion_matrix(results_df['ground_truth'], results_df['predicted'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()

<Figure size 640x480 with 2 Axes>