In [None]:
#!git --version
#!pip install opencv-python


#!pip install backports.functools_lru_cache

# to visualise training in real time
#!pip install tensorboard


# For CUDA 11.8, install PyTorch with CUDA support
#!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
# Import and verify PyTorch installation
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Verify YOLOv5 installation by loading the model
from yolov5 import utils
print("YOLOv5 installation successful!")



In [None]:
# for debugging
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
os.environ["COMET_MODE"] = "DISABLED"


In [None]:
# Test Run
# Navigate to the yolov5 directory
%cd yolov5
# Train using the following syntax and config
!python train.py --img 640 --batch 8 --epochs 50 --data "C:/Users/mp967/OneDrive - University of Exeter/Exeter University/Units/Topics in Business Analytics/dataset/data.yaml" --weights yolov5s.pt --device cpu --name hi_vis_detector_cpu

# Result: Interrupted kernel after 1st epoch, code is working but way too slow

In [None]:
# First Run
%cd yolov5
# This time --device 0 (using GPU instead of CPU)
!python train.py --img 640 --batch 8 --epochs 50 --data "C:/Users/10/OneDrive - University of Exeter/Exeter University/Units/Topics in Business Analytics/dataset/data.yaml" --weights yolov5s.pt --device 0 --name first_run

# Result: Success

In [None]:
# Second Run
%cd yolov5
# 10x more epochs to increase performance, --noplots to reduce the volume of logs sent to the notebook
!python train.py --img 640 --batch 8 --epochs 500 --data "C:/Users/10/OneDrive - University of Exeter/Exeter University/Units/Topics in Business Analytics/dataset/data.yaml" --weights "C:/Users/10/OneDrive - University of Exeter/yolov5/runs/train/first_run/weights/best.pt" --device 0 --name second_run --noplots

# Result: Fail, stopped at 100 epochs due to yolov5's patience (by default, the script automatically stops if performance stops improving for several epochs)
# Why not improving? Overfitting, poor 

In [None]:
# Third Run
%cd yolov5
# Using yolov5 large model on 50 epochs and reduced batch size
!python train.py --img 640 --batch 6 --epochs 50 --data "C:/Users/10/OneDrive - University of Exeter/Exeter University/Units/Topics in Business Analytics/dataset/data.yaml" --weights yolov5l.pt --device 0 --name third_run 

# Result: Success, time needed = approx. 6h, accuracy not as high as expected but indeed better than in second run

In [None]:
# Import pandas and matplotlib
import pandas as pd
import matplotlib.pyplot as plt

# Load the metadata
data = pd.read_csv("https://raw.githubusercontent.com/automat9/Business-Analytics/refs/heads/master/Semester%201/Topics%20in%20Business%20Analytics/runs/results/2results.csv")

# Remove white space from column names
data.columns = data.columns.str.strip()

# Plot config
plt.figure(figsize=(10, 5))
plt.plot(data["metrics/mAP_0.5:0.95"], label="mAP_0.5:0.95")
plt.xlabel("epoch")
plt.ylabel("mAP")
plt.grid(True, alpha=1)
plt.title("mAP_0.5:0.95 for Second Run")
plt.legend()

# Show the plot
plt.show()



In [None]:
# Import pandas
import pandas as pd

# Load the metadata
data = pd.read_csv("https://raw.githubusercontent.com/automat9/Business-Analytics/refs/heads/master/Semester%201/Topics%20in%20Business%20Analytics/runs/results/results_metadata.csv")

# Remove white space from column names
data.columns = data.columns.str.strip()

# Sort columns for each run
first_stats = data[["first/mAP_0.5", "first/mAP_0.5:0.95"]].describe()
second_stats = data[["second/mAP_0.5", "second/mAP_0.5:0.95"]].describe()
third_stats = data[["third/mAP_0.5", "third/mAP_0.5:0.95"]].describe()

# Display the statistics
print("Descriptive Statistics for First Run:")
print(first_stats)

print("Descriptive Statistics for Second Run:")
print(second_stats)

print("Descriptive Statistics for Third Run:")
print(third_stats)

In [8]:
# Import pandas
import pandas as pd

# Load the metadata and remove white space from column names
data = pd.read_csv("https://raw.githubusercontent.com/automat9/Business-Analytics/refs/heads/master/Semester%201/Topics%20in%20Business%20Analytics/runs/results/results_metadata.csv")
data.columns = data.columns.str.strip()

# Sort columns for each run
first = data[["first/mAP_0.5", "first/mAP_0.5:0.95"]].describe()
second = data[["second/mAP_0.5", "second/mAP_0.5:0.95"]].describe()
third = data[["third/mAP_0.5", "third/mAP_0.5:0.95"]].describe()

# Display the statistics
print("First Run:")
print(first)

print("Second Run:")
print(second)

print("Third Run:")
print(third)

First Run:
       first/mAP_0.5  first/mAP_0.5:0.95
count      50.000000           50.000000
mean        0.672929            0.368914
std         0.052689            0.049069
min         0.445690            0.207440
25%         0.679142            0.361355
50%         0.687740            0.390985
75%         0.697310            0.399720
max         0.708650            0.413040
Second Run:
       second/mAP_0.5  second/mAP_0.5:0.95
count      101.000000           101.000000
mean         0.654837             0.364987
std          0.021346             0.012086
min          0.573300             0.317960
25%          0.639870             0.358100
50%          0.652820             0.364940
75%          0.669250             0.372450
max          0.717520             0.406010
Third Run:
       third/mAP_0.5  third/mAP_0.5:0.95
count      50.000000           50.000000
mean        0.663819            0.377881
std         0.045529            0.039858
min         0.414000            0.194210
25%  