<a href="https://colab.research.google.com/github/andrewli4938/TrafficLightDetection/blob/main/Group1_Traffic_Light_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ultralytics



In [40]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import glob
import shutil
from PIL import Image
from google.colab import drive

from ultralytics import YOLO

In [None]:
# import our dataset
import kagglehub
source_path = kagglehub.dataset_download("mbornoe/lisa-traffic-light-dataset")

In [None]:
%pwd
%rm -r data
%mkdir data  # root folder for data

%cd data
%mkdir images
%mkdir labels

%cd images
%mkdir train
%mkdir val
%mkdir test

%cd ../labels
%mkdir train
%mkdir val
%mkdir test

%cd /content

In [None]:
# these two only have total 1000 images, and have duplicate filenames so we can deal with them later
%mv {source_path}/sample-dayClip6 {source_path}/..
%mv {source_path}/sample-nightClip1 {source_path}/..

In [None]:
# move all of the filenames into list
image_paths = glob.glob(f"{source_path}/**/*.jpg", recursive=True)

# process the annotations
target = "frameAnnotationsBOX.csv"
annotations = pd.DataFrame()
annotation_paths = glob.glob(f"{source_path}/**/{target}", recursive=True)
for p in annotation_paths:
  # combine the csv files into one
  new_frame = pd.read_csv(p, sep=";")
  new_frame = new_frame.drop(['Origin file', 'Origin frame number', 'Origin track', 'Origin track frame number'], axis=1)
  annotations = pd.concat([annotations, new_frame])
filenames = annotations[annotations.columns[0]].str.split("/").str[-1]
annotations["Filename"] = filenames
annotations.set_index("Filename", inplace=True)
annotations

In [None]:
def move_files(paths_list, destination_dir):
  for file in paths_list:
    shutil.copy(file, destination_dir)

In [None]:
# TODO: split the data into train, val, test
image_train, image_val_test = train_test_split(image_paths, train_size=0.7, test_size=0.3, random_state=42, shuffle=True)
image_val, image_test = train_test_split(image_val_test, train_size=0.5, random_state=42, shuffle=True)

print(len(image_train))
print(len(image_val))
print(len(image_test))

image_train_dir = "/content/data/images/train"
image_val_dir = "/content/data/images/val"
image_test_dir = "/content/data/images/test"
move_files(image_train, image_train_dir)
move_files(image_val, image_val_dir)
move_files(image_test, image_test_dir)

print(len(os.listdir(image_train_dir)))
print(len(os.listdir(image_val_dir)))
print(len(os.listdir(image_test_dir)))

In [None]:
# import yaml file: defines image locations and encodes classes into numbers for the model
!wget https://raw.githubusercontent.com/andrewli4938/TrafficLightDetection/refs/heads/main/data.yaml

In [None]:
def convert_to_xywh(box_coordinates, image_dimensions):
  image_width, image_height = image_dimensions
  x1, y1, x2, y2 = box_coordinates
  width, height = x2-x1, y2-y1
  x_center = (x1+x2)/2
  y_center = (y1+y2)/2

  x_center = x_center/image_width
  y_center = y_center/image_height
  width = width/image_width
  height = height/image_height

  return (x_center, y_center, width, height)

In [None]:
"""
The *.txt file should be formatted with one row per object in
class x_center y_center width height format. Box coordinates
must be in normalized xywh format (from 0 to 1). If your boxes
are in pixels, you should divide x_center and width by image width,
and y_center and height by image height. Class numbers should be
zero-indexed (start with 0).
"""
image_dims = (1280, 960)

def create_labels(source_dir, destination_dir, annotations_df, encoding):
  for file in os.listdir(source_dir):
    write_path = os.path.join(destination_dir, file.replace(".jpg", ".txt"))
    if file in annotations_df.index:
      rows = annotations_df.loc[file]
      if isinstance(rows, pd.Series):
        rows = pd.DataFrame([rows])
    else:
      continue
    with open(write_path, "w") as image_label:
      for i in range(len(rows)):  # iterate through each label for current image
        class_number = encoding[rows.iloc[i, 0]]
        bounding_box = rows.iloc[i, 1:]
        # print(bounding_box)
        # print(f"image is: {file} at index {i}")
        # print(type(rows))
        xywh = convert_to_xywh(bounding_box, image_dims)  # calculate bounding box coordinates here
        image_label.write(f"{class_number} {xywh[0]} {xywh[1]} {xywh[2]} {xywh[3]}\n")


In [39]:
class_encoding = {"stop": 0, "stopLeft": 1, "warning": 2, "warningLeft": 3,
                  "warningLeft": 4, "go": 5, "goForward": 6, "goLeft": 6}

label_train_dir = "/content/data/labels/train"
label_val_dir = "/content/data/labels/val"
label_test_dir = "/content/data/labels/test"

create_labels(image_train_dir, label_train_dir, annotations, class_encoding)
create_labels(image_val_dir, label_val_dir, annotations, class_encoding)
create_labels(image_test_dir, label_test_dir, annotations, class_encoding)

print(len(os.listdir(label_train_dir)))
print(len(os.listdir(label_val_dir)))
print(len(os.listdir(label_test_dir)))

25396
5444
5425


In [None]:
torch.cuda.is_available()

True

In [None]:
torch.cuda.empty_cache()

In [None]:
!nvidia-smi

Tue Nov  5 03:36:51 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L4                      Off | 00000000:00:03.0 Off |                    0 |
| N/A   50C    P8              13W /  72W |      1MiB / 23034MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
!rm -r /content/runs

In [50]:
# model training
model = YOLO("yolo11n.pt")
results = model.train(data="/content/data.yaml", epochs=20, imgsz=896, batch=128)

Ultralytics 8.3.39 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=/content/data.yaml, epochs=20, time=None, patience=100, batch=128, imgsz=896, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_c

[34m[1mtrain: [0mScanning /content/data/labels/train... 25396 images, 4715 backgrounds, 0 corrupt: 100%|██████████| 30111/30111 [00:22<00:00, 1319.86it/s]






[34m[1mtrain: [0mNew cache created: /content/data/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
[34m[1mval: [0mScanning /content/data/labels/val.cache... 5444 images, 1008 backgrounds, 0 corrupt: 100%|██████████| 6452/6452 [00:00<?, ?it/s]






Plotting labels to runs/detect/train2/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000909, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.001), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 896 train, 896 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train2[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20      37.4G       1.79      3.525      1.079        118        896: 100%|██████████| 236/236 [03:04<00:00,  1.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:31<00:00,  1.20s/it]


                   all       6452      16263      0.797      0.416      0.443      0.238

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20        37G      1.311      1.495     0.9573        134        896: 100%|██████████| 236/236 [02:59<00:00,  1.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.07it/s]

                   all       6452      16263      0.778      0.546      0.578      0.327






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      36.9G      1.224     0.9066      0.943        140        896: 100%|██████████| 236/236 [03:01<00:00,  1.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:23<00:00,  1.09it/s]


                   all       6452      16263      0.813      0.629      0.704      0.396

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      37.4G      1.161     0.7331     0.9276         95        896: 100%|██████████| 236/236 [02:58<00:00,  1.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.06it/s]

                   all       6452      16263      0.754      0.646      0.731      0.416






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      37.3G      1.089     0.6481     0.9104        128        896: 100%|██████████| 236/236 [02:58<00:00,  1.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.07it/s]

                   all       6452      16263      0.789      0.756       0.81      0.461






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20        37G      1.031     0.5977     0.8997        123        896: 100%|██████████| 236/236 [02:58<00:00,  1.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:25<00:00,  1.04it/s]

                   all       6452      16263      0.767      0.818      0.824      0.496






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      37.4G      1.005     0.5739     0.8937        126        896: 100%|██████████| 236/236 [02:58<00:00,  1.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.04it/s]

                   all       6452      16263      0.816      0.808      0.866       0.52






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      36.9G     0.9695     0.5483     0.8859        123        896: 100%|██████████| 236/236 [02:59<00:00,  1.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.05it/s]

                   all       6452      16263      0.806      0.813      0.846      0.524






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20      37.3G     0.9388     0.5235     0.8787        122        896: 100%|██████████| 236/236 [02:59<00:00,  1.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.07it/s]

                   all       6452      16263      0.863      0.862      0.883      0.551






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      36.9G     0.9197     0.5075     0.8739        121        896: 100%|██████████| 236/236 [02:57<00:00,  1.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.05it/s]

                   all       6452      16263      0.877      0.844       0.89      0.577





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      37.1G     0.9007     0.4909     0.8807         65        896: 100%|██████████| 236/236 [02:57<00:00,  1.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.06it/s]

                   all       6452      16263      0.837      0.848       0.86      0.565






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      37.1G     0.8716     0.4711     0.8732         77        896: 100%|██████████| 236/236 [02:49<00:00,  1.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:23<00:00,  1.10it/s]

                   all       6452      16263      0.855      0.862      0.885      0.581






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      37.1G     0.8544     0.4588     0.8692         71        896: 100%|██████████| 236/236 [02:46<00:00,  1.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.04it/s]

                   all       6452      16263      0.865      0.846      0.882      0.581






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20        37G     0.8336     0.4458      0.866         70        896: 100%|██████████| 236/236 [02:49<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.08it/s]

                   all       6452      16263      0.876      0.874      0.897        0.6






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      37.1G     0.8147     0.4321     0.8608         75        896: 100%|██████████| 236/236 [02:47<00:00,  1.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.06it/s]

                   all       6452      16263      0.853      0.882      0.899      0.604






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      37.1G     0.7986     0.4209     0.8588         76        896: 100%|██████████| 236/236 [02:50<00:00,  1.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:23<00:00,  1.09it/s]

                   all       6452      16263      0.895      0.864      0.918       0.61






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      37.1G     0.7761     0.4072     0.8535         88        896: 100%|██████████| 236/236 [02:48<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.08it/s]

                   all       6452      16263      0.899      0.854      0.903      0.615






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      37.1G     0.7578     0.3976     0.8496         80        896: 100%|██████████| 236/236 [02:50<00:00,  1.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:23<00:00,  1.11it/s]

                   all       6452      16263      0.883      0.856      0.902      0.619






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20        37G     0.7441     0.3869     0.8463         85        896: 100%|██████████| 236/236 [02:48<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:23<00:00,  1.09it/s]

                   all       6452      16263      0.893      0.857      0.915      0.625






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20      37.1G     0.7292     0.3778     0.8442         65        896: 100%|██████████| 236/236 [02:47<00:00,  1.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:24<00:00,  1.06it/s]

                   all       6452      16263      0.891      0.869      0.911       0.63






20 epochs completed in 1.119 hours.
Optimizer stripped from runs/detect/train2/weights/last.pt, 5.5MB
Optimizer stripped from runs/detect/train2/weights/best.pt, 5.5MB

Validating runs/detect/train2/weights/best.pt...
Ultralytics 8.3.39 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
YOLO11n summary (fused): 238 layers, 2,583,517 parameters, 0 gradients, 6.3 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:29<00:00,  1.13s/it]


                   all       6452      16263      0.893       0.87      0.912      0.631
                  stop       2792       6561      0.957      0.934      0.975      0.763
              stopLeft       1565       1939      0.968      0.908       0.97      0.832
                    go         25         42       0.68       0.69      0.674      0.328
             goForward       2788       6954       0.94       0.87      0.955      0.695
                goLeft        324        436      0.883      0.916      0.947      0.591
Speed: 0.2ms preprocess, 0.6ms inference, 0.0ms loss, 0.7ms postprocess per image
Results saved to [1mruns/detect/train2[0m


In [52]:
results = model.train(data="/content/data.yaml", epochs=40, imgsz=896, batch=128, augment=True, resume=True)

Ultralytics 8.3.39 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=/content/data.yaml, epochs=600, time=None, patience=100, batch=128, imgsz=896, save=True, save_period=-1, cache=disk, device=None, workers=8, project=exp10-new, name=yolov8n-c3k2-62, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=yolo11n.pt, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_

[34m[1mtrain: [0mScanning /content/data/labels/train.cache... 25396 images, 4715 backgrounds, 0 corrupt: 100%|██████████| 30111/30111 [00:00<?, ?it/s]






[34m[1mtrain: [0m155.1GB disk space required, with 50% safety margin but only 67.5/235.7GB free, not caching images to disk ⚠️
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /content/data/labels/val.cache... 5444 images, 1008 backgrounds, 0 corrupt: 100%|██████████| 6452/6452 [00:00<?, ?it/s]




[34m[1mval: [0mCaching images (22.2GB Disk): 100%|██████████| 6452/6452 [00:00<00:00, 27293.56it/s]


Plotting labels to exp10-new/yolov8n-c3k2-62/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.001), 87 bias(decay=0.0)


AssertionError: yolo11n.pt training to 600 epochs is finished, nothing to resume.
Start a new training without resuming, i.e. 'yolo train model=yolo11n.pt'

In [48]:
val_model = YOLO('/content/best.pt')
result = val_model.val(data='/content/data.yaml')

FileNotFoundError: [Errno 2] No such file or directory: '/content/best.pt'

In [47]:
result.results_dict

{'metrics/precision(B)': 0.00019060556314722812,
 'metrics/recall(B)': 0.00041174619443809925,
 'metrics/mAP50(B)': 5.116077642251972e-05,
 'metrics/mAP50-95(B)': 1.1208608752320572e-05,
 'fitness': 1.5203825519340488e-05}

In [None]:
#TODO: the label files are producing duplicate files, fix the create_labels() function

In [None]:
"""
OVERVIEW
Basically our LISA dataset contains around 44k images and a bunch of labels

Labels are:
  - light status: {go, slow, stop}
  - bounding box of where the stoplight is xyxy coordinates (top left, bottom right)
  - each image may have many labels since there are many stoplights in a single image

DaySequence
"""

In [None]:
"""
END HERE
"""

In [None]:
# # centralize both annotations and images (not scattered in different folders)
# annotations = pd.DataFrame()

# target = "frameAnnotationsBOX.csv"
# annotation_paths = glob.glob(f"{path}/**/{target}", recursive=True)

# # I am going to omit the approximately 1000 pictures and labels in sample-dayClip6
# # and sample-nightClip1 because it is really annoying to work with right now and and
# # we have 43k other values we can work with first
# for p in annotation_paths:
#   tokens = p.split("/")
#   if tokens[-2] == "sample-nightClip1" or tokens[-2] == "sample-dayClip6":  # omit here
#     continue

#   # combine the csv files into one
#   new_frame = pd.read_csv(p, sep=";")
#   new_frame = new_frame.drop(['Origin file', 'Origin frame number', 'Origin track', 'Origin track frame number'], axis=1)
#   annotations = pd.concat([annotations, new_frame])

#   # move all of the nested image files into /content/images_dir
#   if len(tokens)==14:
#     subpath = path+"/"+tokens[-3]+"/"+tokens[-3]+"/"+tokens[-2]+"/frames"
#   else:
#     subpath = path+"/"+tokens[-2]+"/"+tokens[-2]+"/frames"
#   !cp -r {subpath}/* /content/images_dir

# filenames = annotations[annotations.columns[0]].str.split("/").str[-1]
# annotations["Filename"] = filenames
# annotations  # NOTE: our labels are in this df here

In [None]:
# #TODO: define custom dataset class
# class TrafficLightDataset(Dataset):
#   def __init__(self, annotations_df, img_dir, transform=None, target_transform=None):
#     self.img_labels = annotations_df
#     self.img_dir = img_dir
#     self.transform = transform
#     self.target_transform = target_transform

#   def __len__(self):
#     return len(self.img_labels)

#   def __getitem__(self, idx):
#     img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
#     image = read_image(img_path)
#     label = tuple(self.img_labels.iloc[idx, 1:])
#     if self.transform:
#       image = self.transform(image)
#     if self.target_transform:
#       pass  # not sure how this will work on a tuple (worry about it later)
#     return image, label


In [None]:
# #TODO: split our data into train and test dataloaders

# img_dir = "content/images_dir"
# dataset = TrafficLightDataset(annotations, img_dir, transform=None, target_transform=None)

# # shuffle our indices before splitting (need day and night in both sets)
# indices = torch.randperm(len(dataset))
# train_size = int(0.8*len(dataset))
# train_indices = indices[:train_size]
# test_indices = indices[train_size:]

# # split into train and test
# train_dataset = torch.utils.data.Subset(dataset, train_indices)
# test_dataset = torch.utils.data.Subset(dataset, test_indices)

# # we have our dataloaders here
# train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# test_dataloader = DataLoader(test_dataset, batch_size=1000, shuffle=True)