# Preprocessing

In [1]:
import os
# from google.colab import drive
import pandas as pd
import shutil

In [2]:
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/Colab Notebooks

/content/drive/MyDrive/Colab Notebooks


In [None]:
# Upload the zipped data source in this folder
%mkdir Traffic_Sign

#### Unzip the dataset

In [4]:
!unzip '/content/drive/MyDrive/Colab Notebooks/Traffic_Sign/GTSRB.zip' -d '/content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/train/5/00005_00053_00011.png  
  inflating: /content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/train/5/00005_00053_00012.png  
  inflating: /content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/train/5/00005_00053_00013.png  
  inflating: /content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/train/5/00005_00053_00014.png  
  inflating: /content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/train/5/00005_00053_00015.png  
  inflating: /content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/train/5/00005_00053_00016.png  
  inflating: /content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/train/5/00005_00053_00017.png  
  inflating: /content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/train/5/00005_00053_00018.png  
  inflating: /content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/train/5/00005

In [3]:
%cd Traffic_Sign/

/content/drive/MyDrive/Colab Notebooks/Traffic_Sign


In [5]:
%ls

GTSRB.zip           GTSRB_Yolov5.ipynb  [34marchive[m[m/


#### Making directories as per the YOLOv5 custom dataset input format

In [6]:
%mkdir datasets

In [7]:
# Creating Directory structure for yolov5
%mkdir datasets/images
%mkdir datasets/labels

%mkdir datasets/images/train
%mkdir datasets/images/val
%mkdir datasets/images/test

%mkdir datasets/labels/train
%mkdir datasets/labels/val
%mkdir datasets/labels/test

In [52]:
%cd datasets/images/

/content/drive/MyDrive/Colab Notebooks/Traffic_Sign/datasets/images


In [55]:
%ls -d */

[0m[01;34mtest/[0m/  [01;34mtrain/[0m/  [01;34mval/[0m/


In [8]:
%cd archive/

/Users/bharatsharma/Documents/DL2/Project/archive


#### Rdeading Train.csv with filenames, Height, Width and Coordinates

In [9]:
img_df = pd.read_csv('Train.csv')
img_df.head()

Unnamed: 0,Width,Height,Roi.X1,Roi.Y1,Roi.X2,Roi.Y2,ClassId,Path
0,27,26,5,5,22,20,20,Train/20/00020_00000_00000.png
1,28,27,5,6,23,22,20,Train/20/00020_00000_00001.png
2,29,26,6,5,24,21,20,Train/20/00020_00000_00002.png
3,28,27,5,6,23,22,20,Train/20/00020_00000_00003.png
4,28,26,5,5,23,21,20,Train/20/00020_00000_00004.png


#### Converting Coordinates to Image center as required by YOLOv5 model

In [10]:
# Functions to convert Image coordinates to get the yolov5 input format
def compare(v1, v2):
  if v1 > v2:
      vmax, vmin = v1, v2
      return vmax, vmin
  else:
      vmax, vmin = v2, v1
      return vmax, vmin
def convert_labels(z):
  x1 = z['Roi.X1']
  y1 = z['Roi.Y1']
  x2 = z['Roi.X2']
  y2 = z['Roi.Y2']
  size = [z['Height'],z['Width']]
  xmax, xmin = compare(x1, x2)
  ymax, ymin = compare(y1, y2)
  dw = 1./size[1]
  dh = 1./size[0]
  x = (xmin + xmax)/2.0
  y = (ymin + ymax)/2.0
  w = xmax - xmin
  h = ymax - ymin
  x = x*dw
  w = w*dw
  y = y*dh
  h = h*dh
  return x,y,w,h

In [11]:
%pwd

'/Users/bharatsharma/Documents/DL2/Project/archive'

#### Train Test Split

In [12]:
# Splitting the data into train, test & val
from sklearn.model_selection import train_test_split
train_df, valid_df = train_test_split(
    img_df,
    test_size = 0.25,
    random_state = 43,
    shuffle = True,
    stratify = img_df['ClassId']
)

In [13]:
train_label_path = '/Users/bharatsharma/Documents/DL2/Project/datasets/labels/train'
train_image_path = '/Users/bharatsharma/Documents/DL2/Project/datasets/images/train'
for index, row in train_df.iterrows():
  img_fname = row['Path'].split('/')[-1]
  path = row['Path']
  txt_fname = img_fname[:-4] + '.txt'

  # Creating Yolo label format & Normalizing
  label = convert_labels(row)
  class_id = row['ClassId']

  #Creating label file for each image
  with open(os.path.join(train_label_path, txt_fname), 'w+') as txt_file:
    txt_file.write(f'{class_id} {label[0]} {label[1]} {label[2]} {label[3]}')

  shutil.copy(path, os.path.join(train_image_path, img_fname))

In [14]:
val_label_path = '/Users/bharatsharma/Documents/DL2/Project/datasets/labels/val'
val_image_path = '/Users/bharatsharma/Documents/DL2/Project/datasets/images/val'
for index, row in valid_df.iterrows():
  img_fname = row['Path'].split('/')[-1]
  path = row['Path']
  txt_fname = img_fname[:-4] + '.txt'

  # Creating Yolo label format & Normalizing
  label = convert_labels(row)
  class_id = row['ClassId']

  #Creating label file for each image
  with open(os.path.join(val_label_path, txt_fname), 'w+') as txt_file:
    txt_file.write(f'{class_id} {label[0]} {label[1]} {label[2]} {label[3]}')

  shutil.copy(path, os.path.join(val_image_path, img_fname))

In [15]:
%pwd

'/Users/bharatsharma/Documents/DL2/Project/archive'

In [16]:
test_df = pd.read_csv('Test.csv')
test_df.head()

Unnamed: 0,Width,Height,Roi.X1,Roi.Y1,Roi.X2,Roi.Y2,ClassId,Path
0,53,54,6,5,48,49,16,Test/00000.png
1,42,45,5,5,36,40,1,Test/00001.png
2,48,52,6,6,43,47,38,Test/00002.png
3,27,29,5,5,22,24,33,Test/00003.png
4,60,57,5,5,55,52,11,Test/00004.png
