# Full-gt.csv preprocessing

In [1]:
# PyTorch model training for traffic sign recognition and classification

# load label_map.json
import json
import pandas as pd

with open('../files/label_map.json') as json_file:
    label_map = json.load(json_file)
print(label_map)
print(len(label_map))

# load full-gt.csv
full_gt = pd.read_csv('../files/full-gt.csv')
print(full_gt.head())

{'2_1': 1, '1_23': 2, '1_17': 3, '3_24': 4, '8_2_1': 5, '5_20': 6, '5_19_1': 7, '5_16': 8, '3_25': 9, '6_16': 10, '7_15': 11, '2_2': 12, '2_4': 13, '8_13_1': 14, '4_2_1': 15, '1_20_3': 16, '1_25': 17, '3_4': 18, '8_3_2': 19, '3_4_1': 20, '4_1_6': 21, '4_2_3': 22, '4_1_1': 23, '1_33': 24, '5_15_5': 25, '3_27': 26, '1_15': 27, '4_1_2_1': 28, '6_3_1': 29, '8_1_1': 30, '6_7': 31, '5_15_3': 32, '7_3': 33, '1_19': 34, '6_4': 35, '8_1_4': 36, '8_8': 37, '1_16': 38, '1_11_1': 39, '6_6': 40, '5_15_1': 41, '7_2': 42, '5_15_2': 43, '7_12': 44, '3_18': 45, '5_6': 46, '5_5': 47, '7_4': 48, '4_1_2': 49, '8_2_2': 50, '7_11': 51, '1_22': 52, '1_27': 53, '2_3_2': 54, '5_15_2_2': 55, '1_8': 56, '3_13': 57, '2_3': 58, '8_3_3': 59, '2_3_3': 60, '7_7': 61, '1_11': 62, '8_13': 63, '1_12_2': 64, '1_20': 65, '1_12': 66, '3_32': 67, '2_5': 68, '3_1': 69, '4_8_2': 70, '3_20': 71, '3_2': 72, '2_3_6': 73, '5_22': 74, '5_18': 75, '2_3_5': 76, '7_5': 77, '8_4_1': 78, '3_14': 79, '1_2': 80, '1_20_2': 81, '4_1_4': 82

In [2]:
# make a new column with the label_map values
full_gt['label'] = full_gt['sign_class'].map(label_map)
# change width value to x_from+width and height value to y_from+height
full_gt['width'] = full_gt['x_from'] + full_gt['width']
full_gt['height'] = full_gt['y_from'] + full_gt['height']
print(full_gt.head())

                          filename  x_from  y_from  width  height sign_class  \
0  autosave01_02_2012_09_13_33.jpg     649     376    667     394        2_1   
1  autosave01_02_2012_09_13_34.jpg     671     356    691     377        2_1   
2  autosave01_02_2012_09_13_35.jpg     711     332    738     358        2_1   
3  autosave01_02_2012_09_13_36.jpg     764     290    801     326        2_1   
4  autosave01_02_2012_09_13_36.jpg     684     384    701     401       1_23   

   sign_id  label  
0        0    1.0  
1        0    1.0  
2        0    1.0  
3        0    1.0  
4        1    2.0  


In [3]:
with open('../files/train_anno_reduced.json') as json_file:
    train_anno = json.load(json_file)
with open('../files/val_anno.json') as json_file:
    val_anno = json.load(json_file)

train_image_paths = []
for image in train_anno['images']:
    train_image_paths.append(image['file_name'])

val_image_paths = []
for image in val_anno['images']:
    val_image_paths.append(image['file_name'])

print(len(train_image_paths))
print(len(val_image_paths))
print(train_image_paths[0])
print(val_image_paths[0])

1889
5000
rtsd-frames/autosave01_02_2012_09_16_49.jpg
rtsd-frames/autosave10_10_2012_13_50_36_1.jpg


In [4]:
# leave only the image name (without rstd-frames)
import os
train_image_names = []
for image in train_image_paths:
    train_image_names.append(os.path.basename(image))
print(len(train_image_names))
print(train_image_names[0])

val_image_names = []
for image in val_image_paths:
    val_image_names.append(os.path.basename(image))
print(len(val_image_names))
print(val_image_names[0])

1889
autosave01_02_2012_09_16_49.jpg
5000
autosave10_10_2012_13_50_36_1.jpg


In [5]:
# make a new column with the image type
full_gt['image_type'] = '2'
full_gt.loc[full_gt['filename'].isin(train_image_names), 'image_type'] = '0'
full_gt.loc[full_gt['filename'].isin(val_image_names), 'image_type'] = '1'

# if 'label' is empty, then 'label' is '156.0'
full_gt['label'] = full_gt['label'].fillna(156.0)

print(full_gt.head())

                          filename  x_from  y_from  width  height sign_class  \
0  autosave01_02_2012_09_13_33.jpg     649     376    667     394        2_1   
1  autosave01_02_2012_09_13_34.jpg     671     356    691     377        2_1   
2  autosave01_02_2012_09_13_35.jpg     711     332    738     358        2_1   
3  autosave01_02_2012_09_13_36.jpg     764     290    801     326        2_1   
4  autosave01_02_2012_09_13_36.jpg     684     384    701     401       1_23   

   sign_id  label image_type  
0        0    1.0          2  
1        0    1.0          2  
2        0    1.0          2  
3        0    1.0          2  
4        1    2.0          2  


In [6]:
# save the new dataframe as full-gt2.csv
full_gt.to_csv('../files/full-gt2-small.csv', index=False)

In [7]:
# count the number of images in each type
print(full_gt['image_type'].value_counts())


2    90900
1     8866
0     4592
Name: image_type, dtype: int64


# Move images to validation and train directories

In [5]:
# move all images from train_image_paths to train_images folder
import shutil
import os

source_path = 'A:/Профиль/Rab Table/Учёба/3/анализ изображений/курс/files/train_images_for_small/'
path = 'A:/Профиль/Rab Table/Учёба/3/анализ изображений/курс/files/train_images_small/'

for image in train_image_paths:
    shutil.move(source_path + image, path + image)