# Step 1: Get Everything Set Up

## 1.1 Install Libraries

In [None]:
#clone YOLOv5 and
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
%pip install -qr requirements.txt # install dependencies


import torch
import os
from IPython.display import Image, clear_output  # to display images

print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Cloning into 'yolov5'...
remote: Enumerating objects: 16836, done.[K
remote: Counting objects: 100% (11/11), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 16836 (delta 1), reused 6 (delta 0), pack-reused 16825[K
Receiving objects: 100% (16836/16836), 15.58 MiB | 16.48 MiB/s, done.
Resolving deltas: 100% (11545/11545), done.
/content/yolov5
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m96.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m865.5/865.5 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hSetup complete. Using tor

## 1.2 Enable more RAM
You may need more RAM memory to avoid some crashes when you train the model. This is not always necessary but hard to gage in advance.


In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 54.8 gigabytes of available RAM

You are using a high-RAM runtime!


## 1.3 Set Up Working Environment

In [None]:
# Link to G Drive
%cd ..
from google.colab import drive
drive.mount('/content/gdrive')

# This creates a symbolic link so that now the path /content/gdrive/My\ Drive/ is equal to /mydrive
!ln -s /content/gdrive/My\ Drive/AmeliaYoloV5/ /mydrive
!ls /mydrive

# CD back to the yolo repo
%cd yolov5

# Add a dataset folder into yolov5 directory - note: this is the yolov5 directory in Colab, not the CRU Google Drive yolov5 folder
from pathlib import Path
Path("./datasets").mkdir(parents=True, exist_ok=True)

# Copy the data from your gdrive to you virtual machine
!cp -r /mydrive/Jan24/Phylum ./datasets


/content
Mounted at /content/gdrive
AmeliaYoloV5  Distractor_run		Jan24		      YoloV5_making_inferences.ipynb
Confused_run  Evaluation_Metrics.ipynb	Polygon_vs_rectangle  YoloV5_training_model.ipynb
/content/yolov5


# Step 2 - Make Evaluation

## 2.1 Load libaries and set file pathways

In [None]:
# Import necessary libraries
import os

# Set the paths
data_yaml_path = 'datasets/Phylum/dataset_phylum.yaml'  # replace with your data.yaml path
weights_path = 'datasets/Phylum/best.pt'  # replace with your model weights path

## 2.2 Add the section of code below to val.py

Add the below code to val.py at Line 414 and save val.py

In [None]:
    # Define column names (modify as needed)
    column_names = ["Class","Instances","TP", "FP", "Precision", "Recall", "F1", "AP"]
    # Initialize empty table string
    table_string = ""
    # Add header row
    table_string += "\t".join(column_names) + "\n"
    # Loop through each array and add a row
    for row in zip(names, nt, tp, fp, p, r, f1, ap):
      # Convert each element to string
      row_string = "\t".join([str(x) for x in row])
      # Add row to table string
      table_string += row_string + "\n"
    # Define filename (replace with your desired name)
    filename = save_dir/"metrics_table.txt"
    # Open file for writing in text mode
    with open(filename, "w") as f:
     # Write table string to file
     f.write(table_string)
    print(f"Metrics table saved to: {filename}")

## 2.3 Run val.py

Set --img to the same as what you used for training your model

Set --batch-size to the same as you used for training

Set --iou and --conf-thres

In [None]:
# Run val.py script
!python val.py --weights {weights_path} --data {data_yaml_path} --img 960 --iou 0.5 --save-txt --save-conf --conf-thres 0.1 --batch-size 16

[34m[1mval: [0mdata=datasets/Phylum/dataset_phylum.yaml, weights=['datasets/Phylum/best.pt'], batch_size=16, imgsz=960, conf_thres=0.1, iou_thres=0.5, max_det=300, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=True, save_hybrid=False, save_conf=True, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v7.0-350-g6096750f Python-3.10.12 torch-2.3.1+cu121 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
Model summary: 212 layers, 20889303 parameters, 0 gradients, 48.0 GFLOPs
[34m[1mval: [0mScanning /content/yolov5/datasets/Phylum/labels/val.cache... 1125 images, 0 backgrounds, 0 corrupt: 100% 1125/1125 [00:00<?, ?it/s]
Traceback (most recent call last):
  File "/content/yolov5/val.py", line 623, in <module>
    main(opt)
  File "/content/yolov5/val.py", line 594, in main
    run(**vars(opt))
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    retu

## 2.4 Add additional evaluation metrics

Calculate False Negatives

In [None]:
# Import Metrics Table from val.py
# CHANGE FILE PATHWAY
import pandas as pd
from IPython.display import display
metrics = pd.read_csv("runs/val/exp/metrics_table.txt", sep="\t")
display(metrics)

FileNotFoundError: [Errno 2] No such file or directory: 'runs/val/exp/metrics_table.txt'

In [None]:
# Calculate FNs
metrics['FN'] = metrics['Instances'] - metrics['TP']
display(metrics)

Unnamed: 0,Class,Instances,TP,FP,Precision,Recall,F1,AP,FN
0,0,1739,211.0,391.0,0.350717,0.121334,0.180294,0.046187,1528.0
1,1,2,0.0,0.0,0.0,0.0,0.0,0.0,2.0
2,2,28,16.0,16.0,0.495515,0.571429,0.530771,0.171688,12.0
3,3,644,455.0,199.0,0.696083,0.706522,0.701264,0.34298,189.0
4,4,2972,1393.0,1433.0,0.492914,0.468708,0.480506,0.161766,1579.0
5,5,1139,425.0,506.0,0.456588,0.373134,0.410664,0.121708,714.0
6,6,1237,786.0,360.0,0.685661,0.635408,0.659579,0.288708,451.0
7,7,1859,1194.0,525.0,0.694576,0.64224,0.667384,0.253354,665.0
8,8,80,3.0,11.0,0.220955,0.0375,0.064118,0.019727,77.0
9,9,1696,799.0,848.0,0.485139,0.471136,0.478035,0.158333,897.0


In [None]:
# Import
import statistics

# Calculate metrics for overall model
tInstances = sum(metrics['Instances'])
tTP = sum(metrics['TP'])
tFP = sum(metrics['FP'])
tFN = sum(metrics['FN'])
tAP = statistics.mean(metrics['AP'])

# Precision, Recall, F1
tPrecision = tTP / (tTP+tFP)
tRecall = tTP / (tTP+tFN)
tF1 = 2 * ((tPrecision * tRecall) / (tPrecision + tRecall))

new_row_data = {
  'Class': 'Full',  # Add comma after each key-value pair
  'Instances': tInstances,
  'TP': tTP,
  'FP': tFP,
  'Precision': tPrecision,
  'Recall': tRecall,
  'F1': tF1,
  'AP': tAP,
  'FN': tFN,
}

# Create a DataFrame from the new row data
new_df = pd.DataFrame([new_row_data])

# Combine existing and new DataFrame
new_metrics = pd.concat([metrics, new_df])
display(new_metrics)

Unnamed: 0,Class,Instances,TP,FP,Precision,Recall,F1,AP,FN
0,0,1739,211.0,391.0,0.350717,0.121334,0.180294,0.046187,1528.0
1,1,2,0.0,0.0,0.0,0.0,0.0,0.0,2.0
2,2,28,16.0,16.0,0.495515,0.571429,0.530771,0.171688,12.0
3,3,644,455.0,199.0,0.696083,0.706522,0.701264,0.34298,189.0
4,4,2972,1393.0,1433.0,0.492914,0.468708,0.480506,0.161766,1579.0
5,5,1139,425.0,506.0,0.456588,0.373134,0.410664,0.121708,714.0
6,6,1237,786.0,360.0,0.685661,0.635408,0.659579,0.288708,451.0
7,7,1859,1194.0,525.0,0.694576,0.64224,0.667384,0.253354,665.0
8,8,80,3.0,11.0,0.220955,0.0375,0.064118,0.019727,77.0
9,9,1696,799.0,848.0,0.485139,0.471136,0.478035,0.158333,897.0


In [None]:
# If new table looks good, lets save the table
# CHANGE FILE PATHWAY
new_metrics.to_csv("runs/val/exp/metrics_table.txt", sep =",", index=False)

# Step 3 - Save Results


In [None]:
from google.colab import drive
import shutil
import os
# zip full results folder
!zip -r ./runs/val/Eval_Phylum.zip ./runs/val/exp
# source folder
source_folder_path = '/content/yolov5/runs/val/Eval_Phylum.zip'
# destination folder
destination_folder_path = '/content/gdrive/My Drive/AmeliaYoloV5/Jan24/Phylum/model_evaluation/'
# Copy the zipped folder to Google Drive
shutil.copy(source_folder_path, destination_folder_path)

print("Zipped folder copied to Google Drive successfully!")

  adding: runs/val/exp/ (stored 0%)
  adding: runs/val/exp/val_batch1_labels.jpg (deflated 7%)
  adding: runs/val/exp/val_batch0_labels.jpg (deflated 9%)
  adding: runs/val/exp/val_batch2_pred.jpg (deflated 6%)
  adding: runs/val/exp/val_batch0_pred.jpg (deflated 10%)
  adding: runs/val/exp/confusion_matrix.png (deflated 16%)
  adding: runs/val/exp/P_curve.png (deflated 9%)
  adding: runs/val/exp/F1_curve.png (deflated 11%)
  adding: runs/val/exp/val_batch2_labels.jpg (deflated 7%)
  adding: runs/val/exp/metrics_table.txt (deflated 52%)
  adding: runs/val/exp/labels/ (stored 0%)
  adding: runs/val/exp/labels/11000__Isis-DIVE292_SCORPIO_2_20160613-122936.txt (deflated 59%)
  adding: runs/val/exp/labels/112500__Isis-DIVE291_SCORPIO_7_20160612-023119.txt (deflated 49%)
  adding: runs/val/exp/labels/DIVE271_ADS_1200m_T1-20-55-07.txt (deflated 56%)
  adding: runs/val/exp/labels/DIVE270_ADS_800m_T1-23-20-41.txt (deflated 58%)
  adding: runs/val/exp/labels/DIVE287_RB_1200m_T2-15-40-00.txt (de

FileNotFoundError: Directory does not exist: /content/gdrive/My Drive/AmeliaYoloV5/Jan24/Full/model_evaluation/