Web-scrapping Colab: https://colab.research.google.com/drive/1Wxa-1MiGifR9xv9iGfLwxdO0kw9sQqqf?usp=sharing

YOLOv8 model: https://drive.google.com/file/d/1QFiVzSv72qzOBci9797Usmsx4abvcwBl/view?usp=sharing

Previous project code: https://colab.research.google.com/drive/19szRE3shfWG6JX5L3BIDbJYWeUzHi8ex?usp=sharing



In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.1.25-py3-none-any.whl (720 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m720.1/720.1 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.1.25


In [None]:
from ultralytics import YOLO
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import os
import shutil


In [None]:
len(os.listdir("drive/MyDrive/Colab Notebooks/hodl_final_project_data/labels/val"))

22241

# Data Setup



```
# This is formatted as code
```

## Loading in data

In [None]:
!wget -q -P ./ https://www.dropbox.com/scl/fi/txd5rvetipwgmwxbju7l3/pedestrian.zip?rlkey=bxatdudux7m3mhj4s8b6knmor&dl=0
!unzip -qq pedestrian.zip?rlkey=bxatdudux7m3mhj4s8b6knmor&dl=0

replace images/1478019952686311006.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename:  NULL
(EOF or read error, treating as "[N]one" ...)



In [None]:
train = pd.read_csv("labels_train.csv")
train["image_path"] = train.frame.apply(lambda x: "images/" + x)
print("length of training data:", len(train))

val = pd.read_csv("labels_trainval.csv")
val["image_path"] = val.frame.apply(lambda x: "images/" + x)
print("length of validation data:", len(val))

test = pd.read_csv("labels_val.csv")
test["image_path"] = test.frame.apply(lambda x: "images/" + x)
print("length of test data:", len(test))

length of training data: 132406
length of validation data: 165105
length of test data: 32699


## Converting from absolute (x1, y1, x2, y2) to YOLO (X, Y, W, H)

In [None]:
image_width = 480
image_height = 300

def bbox_to_yolo(row):
    # converting XYXY to XYWH, but still in absolute terms for now
    center_x = (row['xmin'] + row['xmax']) / 2
    center_y = (row['ymin'] + row['ymax']) / 2
    width = row['xmax'] - row['xmin']
    height = row['ymax'] - row['ymin']

    # convertin from absolute terms to relative terms
    center_x_relative = center_x / image_width
    center_y_relative = center_y / image_height
    width_relative = width / image_width
    height_relative = height / image_height

    return pd.Series([center_x_relative, center_y_relative, width_relative, height_relative], index=['center_x', 'center_y', 'width', 'height'])

train[['center_x', 'center_y', 'width', 'height']] = train.apply(bbox_to_yolo, axis=1)
val[['center_x', 'center_y', 'width', 'height']] = val.apply(bbox_to_yolo, axis=1)
test[['center_x', 'center_y', 'width', 'height']] = test.apply(bbox_to_yolo, axis=1)


In [None]:
train.head()

Unnamed: 0,frame,xmin,xmax,ymin,ymax,class_id,image_path,center_x,center_y,width,height
0,1478019952686311006.jpg,237,251,143,155,1,images/1478019952686311006.jpg,0.508333,0.496667,0.029167,0.04
1,1478019952686311006.jpg,437,454,120,186,3,images/1478019952686311006.jpg,0.928125,0.51,0.035417,0.22
2,1478019953180167674.jpg,218,231,146,158,1,images/1478019953180167674.jpg,0.467708,0.506667,0.027083,0.04
3,1478019953689774621.jpg,171,182,141,154,2,images/1478019953689774621.jpg,0.367708,0.491667,0.022917,0.043333
4,1478019953689774621.jpg,179,191,144,155,1,images/1478019953689774621.jpg,0.385417,0.498333,0.025,0.036667


## Setting up the Data Stucture

In [None]:
def create_directories(base_path='/content/drive/My Drive/Colab Notebooks/hodl_final_project_data', sub_dirs=['train', 'test', 'val']):
    # Main directories
    imgs_dir = os.path.join(base_path, 'images')
    labels_dir = os.path.join(base_path, 'labels')

    os.makedirs(imgs_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)

    # Subdirectories for images and labels
    for sub_dir in sub_dirs:
        os.makedirs(os.path.join(imgs_dir, sub_dir), exist_ok=True)
        os.makedirs(os.path.join(labels_dir, sub_dir), exist_ok=True)

def move_and_create_labels(df, df_name, imgs_source_dir, base_path='/content/drive/My Drive/Colab Notebooks/hodl_final_project_data'):
    imgs_target_dir = os.path.join(base_path, 'images', df_name)
    labels_target_dir = os.path.join(base_path, 'labels', df_name)

    for image_path in df['image_path'].unique():
        image_name = os.path.basename(image_path)
        source_img_path = os.path.join(imgs_source_dir, image_name)
        if os.path.exists(source_img_path):
            shutil.copy(source_img_path, os.path.join(imgs_target_dir, image_name))

            label_file_path = os.path.join(labels_target_dir, image_name.replace('.jpg', '.txt'))
            with open(label_file_path, 'w') as label_file:
                image_rows = df[df['image_path'] == image_path]
                for _, row in image_rows.iterrows():
                    label_file.write(f"{row['class_id']} {row['center_x']} {row['center_y']} {row['width']} {row['height']}\n")


base_path = '/content/drive/My Drive/Colab Notebooks/hodl_final_project_data'
imgs_source_dir = './images'

create_directories(base_path)
move_and_create_labels(train, 'train', imgs_source_dir, base_path)
move_and_create_labels(val, 'val', imgs_source_dir, base_path)
move_and_create_labels(test, 'test', imgs_source_dir, base_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Modeling

In [None]:
import zipfile
import os

def unzip_file(zip_file, extract_to):
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

# Example usage:
zip_file = 'hodl_final_project_data.zip'
extract_to = 'extracted_data'  # specify the directory where you want to extract the files

if not os.path.exists(extract_to):
    os.makedirs(extract_to)

unzip_file(zip_file, extract_to)
print("Extraction complete.")

BadZipFile: File is not a zip file

In [None]:
!unzip hodl_final_project_data.zip


Archive:  hodl_final_project_data.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of hodl_final_project_data.zip or
        hodl_final_project_data.zip.zip, and cannot find hodl_final_project_data.zip.ZIP, period.


In [None]:
dataset_root_dir = os.getcwd()

yaml_content = f"""
path: /content/drive/My Drive/Colab Notebooks/hodl_final_project_data
train: images/train
val: images/val

# Classes
names:
  0: None
  1: car
  2: truck
  3: pedestrian
  4: cyclist
  5: light
"""

filename = "hodl.yaml"

with open(filename, "w") as file:
    file.write(yaml_content.strip())

filename

'hodl.yaml'

In [None]:
import os
import zipfile

def zipdir(path, ziph):
    # ziph is zipfile handle
    for root, dirs, files in os.walk(path):
        for file in files:
            # Create a relative path for files to keep the structure relative to the intended root
            rel_directory = os.path.relpath(root, path)
            ziph.write(os.path.join(root, file), os.path.join(rel_directory, file))

# Desired directory to be the root in the ZIP file
path_to_dir = '/content/drive/My Drive/Colab Notebooks/hodl_final_project_data'

# File path for the output ZIP file
zip_file_path = 'hodl_final_project_data.zip'

# Create a ZIP file
with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipdir(path_to_dir, zipf)

KeyboardInterrupt: 

In [None]:
os.listdir('hodl_final_project_data')

In [None]:
!unzip hodl_final_project_data.zip

Archive:  hodl_final_project_data.zip
replace content/drive/My Drive/Colab Notebooks/hodl_final_project_data/images/train/1478019952686311006.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
files_in_directory = os.listdir('.')
file_info = [(file, os.path.getsize(file)) for file in files_in_directory]

file_info

[('.config', 4096),
 ('yolov8n.pt', 6534387),
 ('pedestrian.zip?rlkey=bxatdudux7m3mhj4s8b6knmor', 935253372),
 ('images', 1028096),
 ('hodl_final_project_data', 4096),
 ('content', 4096),
 ('labels_val.csv', 1390825),
 ('drive', 4096),
 ('pedestrian.zip?rlkey=bxatdudux7m3mhj4s8b6knmor.1', 935253372),
 ('labels_train.csv', 5630516),
 ('runs', 4096),
 ('.ipynb_checkpoints', 4096),
 ('hodl.yaml', 195),
 ('labels_trainval.csv', 7021305),
 ('hodl_final_project_data.zip', 1891031208),
 ('sample_data', 4096)]

## YOLOV8 - New Model from Scratch

###

In [None]:
# Load a model
model = YOLO("yolov8n.yaml")  # build a new model from scratch
model.to('cuda')
# Use the model
results = model.train(data=os.path.join("/content", "hodl.yaml"), epochs=3)  # train the model

[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.yaml, data=/content/hodl.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cuda:0, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, i

100%|██████████| 755k/755k [00:00<00:00, 21.5MB/s]


Overriding model.yaml nc=80 with nc=6

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics

100%|██████████| 6.23M/6.23M [00:00<00:00, 93.4MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /content/drive/My Drive/Colab Notebooks/hodl_final_project_data/labels/train... 466 images, 0 backgrounds, 0 corrupt:   3%|▎         | 466/18000 [03:36<2:16:04,  2.15it/s] 


KeyboardInterrupt: 

In [None]:
results = model.val()

Ultralytics YOLOv8.1.25 🚀 Python-3.10.12 torch-2.1.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8n summary (fused): 168 layers, 3006818 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /content/labels/val.cache... 22241 images, 0 backgrounds, 0 corrupt: 100%|██████████| 22241/22241 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1391/1391 [02:35<00:00,  8.93it/s]


                   all      22241     165082      0.525      0.152      0.156     0.0683
                   car      22241     123303      0.562      0.491      0.519      0.248
                 truck      22241       7322      0.293      0.124      0.112     0.0475
            pedestrian      22241      15538      0.419   9.75e-05     0.0185    0.00401
               cyclist      22241       1676          1          0    0.00761    0.00238
                 light      22241      17243       0.35      0.143      0.123     0.0395
Speed: 0.2ms preprocess, 2.3ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1mruns/detect/train112[0m


## YOLOv8 Model Pretrained

['1478900557768677983.jpg',
 '1478901433556259674.jpg',
 '1478901333009094649.jpg',
 '1478900317826881043.jpg',
 '1478901134200328117.jpg',
 '1478899607143127459.jpg',
 '1478900107593305610.jpg',
 '1478899766532604666.jpg',
 '1478900551485493504.jpg',
 '1478899693407510177.jpg']


