In [1]:
import os
from pathlib import Path
import glob
import pandas as pd
import numpy as np

from breed_helpers import *

In [2]:
image_root = 'data/Images'
annot_root = 'data/Annotation'

dog_paths = np.array(glob.glob(image_root + '/*/*'))
annotations = np.array(glob.glob(annot_root + '/*/*'))
breed_list = [x.split('-', 1)[-1] for x in os.listdir(image_root)]

if(get_image_path(annotations[0]) != dog_paths[0]):
    dog_paths = [get_image_path(x) for x in annotations]
    assert get_image_path(annotations[0]) == dog_paths[0]

for i in range(len(annotations)):
        dog_paths[i] = dog_paths[i].replace('\\','/')
        annotations[i] = annotations[i].replace('\\','/')

In [3]:
df = pd.DataFrame(
        {'Breed': [get_dog_breed(x) for x in annotations], 
        'Folder_Dir': [x.split('/')[2].split('-')[0] for x in annotations], 
        'Image_Dir': [x.split('/')[-1] for x in annotations],
        'Bbox': [get_bbox(x) for x in annotations],
        'Num_Dogs': [len(get_bbox(x)) for x in annotations],
        'Image_Path': dog_paths})
df.head(11)

Unnamed: 0,Breed,Folder_Dir,Image_Dir,Bbox,Num_Dogs,Image_Path
0,Chihuahua,n02085620,n02085620_10074,"[(25, 10, 276, 498)]",1,data/Images/n02085620-Chihuahua/n02085620_1007...
1,Chihuahua,n02085620,n02085620_10131,"[(49, 9, 393, 493)]",1,data/Images/n02085620-Chihuahua/n02085620_1013...
2,Chihuahua,n02085620,n02085620_10621,"[(142, 43, 335, 250)]",1,data/Images/n02085620-Chihuahua/n02085620_1062...
3,Chihuahua,n02085620,n02085620_1073,"[(0, 27, 312, 498)]",1,data/Images/n02085620-Chihuahua/n02085620_1073...
4,Chihuahua,n02085620,n02085620_10976,"[(90, 104, 242, 452)]",1,data/Images/n02085620-Chihuahua/n02085620_1097...
5,Chihuahua,n02085620,n02085620_11140,"[(17, 38, 451, 253)]",1,data/Images/n02085620-Chihuahua/n02085620_1114...
6,Chihuahua,n02085620,n02085620_11238,"[(39, 191, 145, 383)]",1,data/Images/n02085620-Chihuahua/n02085620_1123...
7,Chihuahua,n02085620,n02085620_11258,"[(76, 30, 363, 375)]",1,data/Images/n02085620-Chihuahua/n02085620_1125...
8,Chihuahua,n02085620,n02085620_11337,"[(59, 37, 229, 179)]",1,data/Images/n02085620-Chihuahua/n02085620_1133...
9,Chihuahua,n02085620,n02085620_11477,"[(317, 95, 558, 459)]",1,data/Images/n02085620-Chihuahua/n02085620_1147...


In [4]:
chop_df = df.copy(deep=True)
df_list = list()
rows = 0

for i in range(len(df)):
    if df.at[i, 'Num_Dogs'] != 1: #iterate through the original df until a row is found in which there is a list of bboxes
        df_list.append(chop_df.head(rows))
        chop_df = chop_df.iloc[rows: , :]

        #transform row with multiple dogs
        multi_dog_list = list()
        new_df = chop_df.head(1).reset_index(drop=True).copy() #A df containing only the row in which Num_Dogs > 1
        #print(new_df)
        bbox_list = new_df.at[0, 'Bbox'] #the list of bbox's of the dogs

        for bbox in bbox_list:
            multi_dog_df = new_df.copy() #a copy of the one row df created above
            multi_dog_df.at[0, 'Bbox'] = bbox #change the bbox from a list to the correct 1 item
            multi_dog_list.append(multi_dog_df) #append one row df to a list
        
        new_df = pd.concat(multi_dog_list, ignore_index=True) #concat the df's
        #print(f'new_df: {new_df}')
        df_list.append(new_df)
        chop_df = chop_df.iloc[1:]
        rows = 0
    else: #if there is only one dog in the picture, remove the list a replace it with the tuple inside
        rows += 1
        bbox_list = chop_df.at[i, 'Bbox']
        chop_df.at[i, 'Bbox'] = bbox_list[0]
        
df_list.append(chop_df)
print(f'Number of images w/ Num_Dogs > 1 : {len(df_list)}')
df2 = pd.concat(df_list, ignore_index=True)
df2.drop(labels=['Num_Dogs'], axis='columns', inplace=True)
print(f'Number of Total Dogs:              {len(df2)}')
df2.head(50000)

Number of images w/ Num_Dogs > 1 : 2717
Number of Total Dogs:              22126


Unnamed: 0,Breed,Folder_Dir,Image_Dir,Bbox,Image_Path
0,Chihuahua,n02085620,n02085620_10074,"(25, 10, 276, 498)",data/Images/n02085620-Chihuahua/n02085620_1007...
1,Chihuahua,n02085620,n02085620_10131,"(49, 9, 393, 493)",data/Images/n02085620-Chihuahua/n02085620_1013...
2,Chihuahua,n02085620,n02085620_10621,"(142, 43, 335, 250)",data/Images/n02085620-Chihuahua/n02085620_1062...
3,Chihuahua,n02085620,n02085620_1073,"(0, 27, 312, 498)",data/Images/n02085620-Chihuahua/n02085620_1073...
4,Chihuahua,n02085620,n02085620_10976,"(90, 104, 242, 452)",data/Images/n02085620-Chihuahua/n02085620_1097...
...,...,...,...,...,...
22121,African_hunting_dog,n02116738,n02116738_9818,"(48, 0, 499, 332)",data/Images/n02116738-African_hunting_dog/n021...
22122,African_hunting_dog,n02116738,n02116738_9829,"(0, 39, 170, 299)",data/Images/n02116738-African_hunting_dog/n021...
22123,African_hunting_dog,n02116738,n02116738_9844,"(199, 136, 473, 348)",data/Images/n02116738-African_hunting_dog/n021...
22124,African_hunting_dog,n02116738,n02116738_9844,"(235, 140, 539, 334)",data/Images/n02116738-African_hunting_dog/n021...


In [5]:
bbox_col = df2.Bbox

bbox_df = pd.DataFrame(
        {'x1': [x[0] for x in bbox_col], 
        'y1': [x[1] for x in bbox_col], 
        'x2': [x[2] for x in bbox_col],
        'y2': [x[3] for x in bbox_col]})

In [6]:
images_df = df2.Image_Path

df3 = df2.drop(labels=['Image_Path'], axis='columns')
df_final = pd.concat([df3, bbox_df, images_df], axis=1)

df_final.head(50000)

Unnamed: 0,Breed,Folder_Dir,Image_Dir,Bbox,x1,y1,x2,y2,Image_Path
0,Chihuahua,n02085620,n02085620_10074,"(25, 10, 276, 498)",25,10,276,498,data/Images/n02085620-Chihuahua/n02085620_1007...
1,Chihuahua,n02085620,n02085620_10131,"(49, 9, 393, 493)",49,9,393,493,data/Images/n02085620-Chihuahua/n02085620_1013...
2,Chihuahua,n02085620,n02085620_10621,"(142, 43, 335, 250)",142,43,335,250,data/Images/n02085620-Chihuahua/n02085620_1062...
3,Chihuahua,n02085620,n02085620_1073,"(0, 27, 312, 498)",0,27,312,498,data/Images/n02085620-Chihuahua/n02085620_1073...
4,Chihuahua,n02085620,n02085620_10976,"(90, 104, 242, 452)",90,104,242,452,data/Images/n02085620-Chihuahua/n02085620_1097...
...,...,...,...,...,...,...,...,...,...
22121,African_hunting_dog,n02116738,n02116738_9818,"(48, 0, 499, 332)",48,0,499,332,data/Images/n02116738-African_hunting_dog/n021...
22122,African_hunting_dog,n02116738,n02116738_9829,"(0, 39, 170, 299)",0,39,170,299,data/Images/n02116738-African_hunting_dog/n021...
22123,African_hunting_dog,n02116738,n02116738_9844,"(199, 136, 473, 348)",199,136,473,348,data/Images/n02116738-African_hunting_dog/n021...
22124,African_hunting_dog,n02116738,n02116738_9844,"(235, 140, 539, 334)",235,140,539,334,data/Images/n02116738-African_hunting_dog/n021...


In [9]:
df_final.to_csv('data/breeds.csv', index=False)