**Step 1:** Download the YOLOv7 repository and install the requirements.

In [None]:
# Download YOLOv7 repository and install requirements
!git clone https://github.com/WongKinYiu/yolov7
%cd yolov7
!pip install -r requirements.txt

**Step 2:** Download the datasets directly off of Roboflow. extracted0.zip is PKLot, and extracted1.zip is CarPK.

In [None]:
# Grab the dataset
import requests
import os
# Direct link to the dataset(s)
# 0 is PKLot, 1 is CarPK
dataset_url = ["https://public.roboflow.com/ds/BLqIb30AQq?key=SIb62cQiK1", "https://universe.roboflow.com/ds/TBXdF68sRI?key=XgQtOaoTU1"]

# Directory to save the downloaded dataset
save_dir = "roboflow_dataset"

# Create the directory if it doesn't exist
os.makedirs(save_dir, exist_ok=True)

# Download the dataset
for i in range(2):
  response = requests.get(dataset_url[i])
  if response.status_code == 200:
      with open(os.path.join(save_dir, "dataset" + str(i) + ".zip"), "wb") as f:
          f.write(response.content)
      print("Dataset downloaded successfully.")
  else:
      print("Failed to download dataset.")

**Step 3:** Extract the datasets. There are a few options. To extract both datasets, use lines 25-38. Otherwise, uncomment one of lines 9-12. It does not really matter which one, but later on it makes it possible to come back and re-extract the original dataset.

In [None]:
# Extract the dataset(s)
import zipfile
import os

# For best_v1
zip_file_path = "roboflow_dataset/dataset" + str(0) + ".zip"

# Directory to extract the dataset
#extracted_dir_path = "roboflow_dataset/v1_og"
#extracted_dir_path = "roboflow_dataset/v1_baw"
#extracted_dir_path = "roboflow_dataset/v1_aug"
extracted_dir_path = "roboflow_dataset/v1_empty"

# Create the directory if it doesn't exist
os.makedirs(extracted_dir_path, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_dir_path)

print("Dataset extracted successfully.")


# Path to the downloaded zip file
'''for i in range(2):
  zip_file_path = "roboflow_dataset/dataset" + str(i) + ".zip"

  # Directory to extract the dataset
  extracted_dir_path = "roboflow_dataset/extracted" + str(i)

  # Create the directory if it doesn't exist
  os.makedirs(extracted_dir_path, exist_ok=True)

  # Extract the zip file
  with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
      zip_ref.extractall(extracted_dir_path)

  print("Dataset extracted successfully.")'''

**Step 4:** Process the data as needed. The first block will convert all images to grayscale. The second will augment the dataset with 90 degree counter-clockwise rotations (note: this doubles the size of the dataset). The third block will get rid of 'space-occupied' labels and turn it into a single object classification problem. The fourth block will combine the datasets if two have been downloaded (note: this does not combine PKLot and CarPK correctly due to CarPK not having 'space-empty' labels, this combo was not used later on in the project).

In [9]:
# Convert images to black and white
from PIL import Image

# Directory to be converted
train_dir_path = "roboflow_dataset/v1_baw/train/images"
valid_dir_path = "roboflow_dataset/v1_baw/valid/images"

test_dir_path = "roboflow_dataset/v1_baw/test/images"

# Training directory
'''for name in os.listdir(train_dir_path):
  image_file = Image.open(train_dir_path+'/'+name)
  image_file = image_file.convert('L')
  image_file.save(train_dir_path+'/'+name)'''

# Valid directory
'''for name in os.listdir(valid_dir_path):
  image_file = Image.open(valid_dir_path+'/'+name)
  image_file = image_file.convert('L')
  image_file.save(valid_dir_path+'/'+name)'''

# Test directory
for name in os.listdir(test_dir_path):
  image_file = Image.open(test_dir_path+'/'+name)
  image_file = image_file.convert('L')
  image_file.save(test_dir_path+'/'+name)


In [None]:
# Convert test images with just 90 degree rotations counterclockwise
# This doubles the size of the dataset increasing training time to 6hrs for 20 epochs
from PIL import Image

def Rotate90(bbox, img_width=640):
  # Get the coords of the bbox
  lbl, x_min, y_min, x_max, y_max = bbox
  x_max += x_min
  y_max += y_min

  # Rotate
  new_xmin = min(max(y_min, 0), 1)
  new_ymin = min(max(1 - x_max, 0), 1)
  new_xmax = min(max(y_max - new_xmin, 0), 1)
  new_ymax = min(max(1 - x_min - new_ymin, 0), 1)

  return [lbl, new_xmin, new_ymin, new_xmax, new_ymax]

# Directory to be converted
train_dir_img = "roboflow_dataset/v1_aug/train/images"
train_dir_lbl = "roboflow_dataset/v1_aug/train/labels"
valid_dir_img = "roboflow_dataset/v1_aug/valid/images"
valid_dir_lbl = "roboflow_dataset/v1_aug/valid/labels"

test_dir_img = "roboflow_dataset/v1_aug/test/images"
test_dir_lbl = "roboflow_dataset/v1_aug/test/labels"

# Training directory
c = 0
for name in os.listdir(train_dir_img):
  image_file = Image.open(train_dir_img+'/'+name)
  image_file = image_file.rotate(90)
  image_file.save(train_dir_img+'/'+str(c)+name)

  # Rotate bounding boxes in the labels file
  label_file = train_dir_lbl+'/'+name[:-3]+'txt'
  data = []
  with open(label_file, 'r', encoding='utf-8') as file:
    data = file.readlines()
    for count, line in enumerate(data):
      line_split = line.split(' ')
      # Cut off the newline character
      line_split[-1] = line_split[-1][:-2]
      line_split = [float(i) for i in line_split]
      line_new = Rotate90(line_split)
      line_new[0] = int(line_new[0])
      line_new = " ".join(str(item) for item in line_new)
      data[count] = line_new+'\n'

  # Save the file
  f = open(train_dir_lbl+'/'+str(c)+name[:-3]+'txt', "w")
  f.write(''.join(data))
  f.close()

  c += 1


# Valid directory
c = 0
for name in os.listdir(valid_dir_img):
  image_file = Image.open(valid_dir_img+'/'+name)
  image_file = image_file.rotate(90)
  image_file.save(valid_dir_img+'/'+str(c)+name)

  # Rotate bounding boxes in the labels file
  label_file = valid_dir_lbl+'/'+name[:-3]+'txt'
  data = []
  with open(label_file, 'r', encoding='utf-8') as file:
    data = file.readlines()
    for count, line in enumerate(data):
      line_split = line.split(' ')
      # Cut off the newline character
      line_split[-1] = line_split[-1][:-2]
      line_split = [float(i) for i in line_split]
      line_new = Rotate90(line_split)
      line_new[0] = int(line_new[0])
      line_new = " ".join(str(item) for item in line_new)
      data[count] = line_new+'\n'

  # Overwrite the file
  f = open(valid_dir_lbl+'/'+str(c)+name[:-3]+'txt', "w")
  f.write(''.join(data))
  f.close()

  c += 1


In [13]:
# Get rid of the space occupied labels - 1

# Directory to be converted
train_dir_lbl = "roboflow_dataset/v1_empty/train/labels"
valid_dir_lbl = "roboflow_dataset/v1_empty/valid/labels"
test_dir_lbl = "roboflow_dataset/v1_empty/test/labels"

# Training directory
for name in os.listdir(train_dir_lbl):
  # Get rid of 1s in the labels file
  label_file = train_dir_lbl+'/'+name
  data = []
  with open(label_file, 'r', encoding='utf-8') as file:
    line_data = file.readlines()
    for line in line_data:
      if (line[0] != '1'):
        data.append(line)

  # Save the file
  f = open(train_dir_lbl+'/'+name, "w")
  f.write(''.join(data))
  f.close()

# Valid directory
for name in os.listdir(valid_dir_lbl):
  # Get rid of 1s in the labels file
  label_file = valid_dir_lbl+'/'+name
  data = []
  with open(label_file, 'r', encoding='utf-8') as file:
    line_data = file.readlines()
    for line in line_data:
      if (line[0] != '1'):
        data.append(line)

  # Save the file
  f = open(valid_dir_lbl+'/'+name, "w")
  f.write(''.join(data))
  f.close()

# Test directory
for name in os.listdir(test_dir_lbl):
  # Get rid of 1s in the labels file
  label_file = test_dir_lbl+'/'+name
  data = []
  with open(label_file, 'r', encoding='utf-8') as file:
    line_data = file.readlines()
    for line in line_data:
      if (line[0] != '1'):
        data.append(line)

  # Save the file
  f = open(test_dir_lbl+'/'+name, "w")
  f.write(''.join(data))
  f.close()


In [None]:
# Combine datasets
# This was for CarPK and PKLot
import os
import shutil

mysource_train_imgs_folder = "/content/roboflow_dataset/extracted1/train/images"
mydestination_train_imgs_folder = "/content/roboflow_dataset/extracted0/train/images"

mysource_train_lbls_folder = "/content/roboflow_dataset/extracted1/train/labels"
mydestination_train_lbls_folder = "/content/roboflow_dataset/extracted0/train/labels"

mysource_test_imgs_folder = "/content/roboflow_dataset/extracted1/test/images"
mydestination_test_imgs_folder = "/content/roboflow_dataset/extracted0/test/images"

mysource_test_lbls_folder = "/content/roboflow_dataset/extracted1/test/labels"
mydestination_test_lbls_folder = "/content/roboflow_dataset/extracted0/test/labels"

mysource_valid_imgs_folder = "/content/roboflow_dataset/extracted1/valid/images"
mydestination_valid_imgs_folder = "/content/roboflow_dataset/extracted0/valid/images"

mysource_valid_lbls_folder = "/content/roboflow_dataset/extracted1/valid/labels"
mydestination_valid_lbls_folder = "/content/roboflow_dataset/extracted0/valid/labels"

sources = [mysource_train_imgs_folder, mysource_train_lbls_folder, mysource_test_imgs_folder,
           mysource_test_lbls_folder, mysource_valid_imgs_folder, mysource_valid_lbls_folder]
dests = [mydestination_train_imgs_folder, mydestination_train_lbls_folder, mydestination_test_imgs_folder,
         mydestination_test_lbls_folder, mydestination_valid_imgs_folder, mydestination_valid_lbls_folder]

for i in range(6):
  for root, dirs, files in os.walk(sources[i]):
      for file in files:
          mysrc_file = os.path.join(root, file)
          shutil.copy2(mysrc_file, dests[i])

**Step 5:** For training only, download the starting weights for yolov7-tiny.

In [None]:
# Download tiny model weights
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-tiny.pt

**Step 6:** Configure the cfg/training/yolov7-tiny.yaml file to handle 2 or 1 classes. Also take the data.yaml file out of the extracted directory and fix the paths to point to the train/valid/test images folders (note: have to add a new line for test). Also fix the classes to 1 and delete 'space-occupied' label if needed. This data.yaml file needs to be relocated to yolov7/data directory.

**Step 7:** Training the model. This is already setup as according to the tutorial followed. The only thing to change is the number of epochs; we used 5, 10, 20, and 30 epochs. The tutorial recommends 100 but I could not babysit the runs for that long. Make sure to download the weights file out of runs/train/exp#.

In [None]:
# Train the model
# v1_og - Unaltered dataset
# v1_baw - black and white
# v1_aug - rotated as well
# v1_empty - just empty labels
!python /content/yolov7/train.py --epochs 20 --workers 4 --device 0 --batch-size 32 \
--data data/data.yaml --img 640 640 --cfg cfg/training/yolov7-tiny.yaml \
--weights 'yolov7-tiny.pt' --name yolov7_tiny_spaces_fixed_res --hyp data/hyp.scratch.tiny.yaml

**Step 8:** Test the model. Make sure to take the weights file out of runs/train/exp# and move into the outermost directory /content. Change the name of the weights argument to match this file. Make sure to download all of the resulting diagrams out of runs/test/exp#.

In [None]:
# Test the model
!python /content/yolov7/test.py --weights best_v1_aug.pt --task test --data data/data.yaml

**Optional:** While making sure everything was working, sometimes I wanted to look at individual images overlayed with their prediction. That can be done with the code below.

In [None]:
# make an inference and view it on the image
!python /content/yolov7/detect.py --source "umbc_stad_out.png" --weights best_v2.pt
#!python /content/yolov7/detect.py --source /content/roboflow_dataset/extracted0/test/images/2012-09-12_10_05_57_jpg.rf.5f9542ab6498fd436eef35d5ac8f5c04.jpg --weights best.pt
#!python /content/yolov7/detect.py --source /content/2012-09-12_10_05_57_jpg.rf.5f9542ab6498fd436eef35d5ac8f5c04-blurred.jpg --weights best.pt