In [85]:
import numpy as np
import pandas as pd

families = pd.read_csv('families.txt', names = ['PlaneFam'])
families.head()

Unnamed: 0,PlaneFam
0,A300
1,A310
2,A320
3,A330
4,A340


In [86]:
fam_dict = families.to_dict()['PlaneFam']

In [87]:
fam_dict

{0: 'A300',
 1: 'A310',
 2: 'A320',
 3: 'A330',
 4: 'A340',
 5: 'A380',
 6: 'ATR-42',
 7: 'ATR-72',
 8: 'An-12',
 9: 'BAE 146',
 10: 'BAE-125',
 11: 'Beechcraft 1900',
 12: 'Boeing 707',
 13: 'Boeing 717',
 14: 'Boeing 727',
 15: 'Boeing 737',
 16: 'Boeing 747',
 17: 'Boeing 757',
 18: 'Boeing 767',
 19: 'Boeing 777',
 20: 'C-130',
 21: 'C-47',
 22: 'CRJ-200',
 23: 'CRJ-700',
 24: 'Cessna 172',
 25: 'Cessna 208',
 26: 'Cessna Citation',
 27: 'Challenger 600',
 28: 'DC-10',
 29: 'DC-3',
 30: 'DC-6',
 31: 'DC-8',
 32: 'DC-9',
 33: 'DH-82',
 34: 'DHC-1',
 35: 'DHC-6',
 36: 'DR-400',
 37: 'Dash 8',
 38: 'Dornier 328',
 39: 'EMB-120',
 40: 'Embraer E-Jet',
 41: 'Embraer ERJ 145',
 42: 'Embraer Legacy 600',
 43: 'Eurofighter Typhoon',
 44: 'F-16',
 45: 'F/A-18',
 46: 'Falcon 2000',
 47: 'Falcon 900',
 48: 'Fokker 100',
 49: 'Fokker 50',
 50: 'Fokker 70',
 51: 'Global Express',
 52: 'Gulfstream',
 53: 'Hawk T1',
 54: 'Il-76',
 55: 'King Air',
 56: 'L-1011',
 57: 'MD-11',
 58: 'MD-80',
 59: 'M

In [88]:
images_box = pd.read_csv('images_box.txt', names = ['image', 'xmin', 'ymin', 'xmax', 'ymax'], delim_whitespace=True)
images_box['image']=images_box['image'].astype(str).str.zfill(7)
print(images_box.dtypes)
images_box.head()

image    object
xmin      int64
ymin      int64
xmax      int64
ymax      int64
dtype: object


Unnamed: 0,image,xmin,ymin,xmax,ymax
0,1025794,3,144,998,431
1,481847,73,220,1198,508
2,1514522,7,217,1196,551
3,1340192,83,155,964,462
4,810608,19,146,986,443


In [89]:
from PIL import Image

width_list, height_list = [], []

for index, row in images_box.iterrows():
    with Image.open('/home/barrett/aircraft/fgvc-aircraft-2013b/data/images/'+str(row['image']).zfill(7)+'.jpg') as img:
        width, height = img.size
        width_list.append(width)
        height_list.append(height)

images_box['totwidth'] = width_list
images_box['totheight'] = height_list
images_box.head()

Unnamed: 0,image,xmin,ymin,xmax,ymax,totwidth,totheight
0,1025794,3,144,998,431,1024,695
1,481847,73,220,1198,508,1200,802
2,1514522,7,217,1196,551,1200,882
3,1340192,83,155,964,462,1024,699
4,810608,19,146,986,443,1024,745


In [90]:
# bbox coords need to be converted to floats
# Format is classnum (families index) float float float float
# x_center y_center width height truncated to 6 dec places

images_box['x_center'] = ((images_box['xmax']-images_box['xmin'])/2)/images_box['totwidth']
images_box['y_center'] = ((images_box['ymax']-images_box['ymin'])/2)/images_box['totheight']

images_box['width'] = (images_box['xmax']-images_box['xmin'])/images_box['totwidth']
images_box['height'] = (images_box['ymax']-images_box['ymin'])/images_box['totheight']

In [91]:
images_box.drop(['xmin', 'ymin', 'xmax', 'ymax', 'totwidth', 'totheight'], axis=1, inplace=True)

In [92]:
images_box.head()

Unnamed: 0,image,x_center,y_center,width,height
0,1025794,0.48584,0.206475,0.97168,0.41295
1,481847,0.46875,0.179551,0.9375,0.359102
2,1514522,0.495417,0.189342,0.990833,0.378685
3,1340192,0.430176,0.219599,0.860352,0.439199
4,810608,0.472168,0.199329,0.944336,0.398658


In [93]:
# Not needed since we'll use trainval as train
# images_family_train = pd.read_csv('images_family_train.txt', names = ['Plane Family'])
# images_family_train.describe()

## Train data processing (2/3 of data):

In [94]:
images_family_trainval = pd.read_csv('images_family_trainval.txt', names = ['Ident'])
images_family_trainval['image']=images_family_trainval.Ident.str.split().str.get(0)
images_family_trainval['label']=images_family_trainval.Ident.str.split(' ', 1).str.get(1)
images_family_trainval.drop('Ident', axis = 1, inplace=True)
images_family_trainval.head()

Unnamed: 0,image,label
0,1025794,Boeing 707
1,1340192,Boeing 707
2,56978,Boeing 707
3,698580,Boeing 707
4,450014,Boeing 707


In [95]:
def get_key(val): 
    for key, value in fam_dict.items(): 
         if val == value: 
             return key 
  
    return "key doesn't exist"

images_family_trainval['class_num'] = images_family_trainval.label.apply(get_key)
images_family_trainval.head()

Unnamed: 0,image,label,class_num
0,1025794,Boeing 707,12
1,1340192,Boeing 707,12
2,56978,Boeing 707,12
3,698580,Boeing 707,12
4,450014,Boeing 707,12


In [133]:
images_family_trainval = images_family_trainval.merge(images_box, how='left', on='image')

In [134]:
for x in images_family_trainval.drop('label', axis = 1).iterrows():
    pd.DataFrame([x[1][1:6]]).to_csv("labels/train/"+str(x[1][0]).zfill(7)+".txt", sep=' ', header=False, index=False)

In [97]:
# Not needed since we'll use trainval as train
# images_family_val = pd.read_csv('images_family_val.txt', names = ['Plane Family'])
# images_family_val.describe()

## Test data processing (1/3 of data):

In [98]:
images_family_test = pd.read_csv('images_family_test.txt', names = ['Ident'])
images_family_test['image']=images_family_test.Ident.str.split().str.get(0)
images_family_test['label']=images_family_test.Ident.str.split(' ', 1).str.get(1)
images_family_test.drop('Ident', axis = 1, inplace=True)
images_family_test.head()

Unnamed: 0,image,label
0,1514522,Boeing 707
1,747566,Boeing 707
2,1008575,Boeing 707
3,717480,Boeing 707
4,991569,Boeing 707


In [99]:
images_family_test['class_num'] = images_family_test.label.apply(get_key)
images_family_test.head()

Unnamed: 0,image,label,class_num
0,1514522,Boeing 707,12
1,747566,Boeing 707,12
2,1008575,Boeing 707,12
3,717480,Boeing 707,12
4,991569,Boeing 707,12


In [110]:
# images_family_test = images_family_test.merge(images_box, how='left', on='image')
images_family_test.head()

Unnamed: 0,image,label,class_num,x_center,y_center,width,height
0,1514522,Boeing 707,12,0.495417,0.189342,0.990833,0.378685
1,747566,Boeing 707,12,0.463867,0.234848,0.927734,0.469697
2,1008575,Boeing 707,12,0.496094,0.241727,0.992188,0.483453
3,717480,Boeing 707,12,0.491667,0.303922,0.983333,0.607843
4,991569,Boeing 707,12,0.430664,0.213267,0.861328,0.426534


In [132]:
images_family_test.dtypes

image         object
label         object
class_num      int64
x_center     float64
y_center     float64
width        float64
height       float64
dtype: object

In [129]:
for x in images_family_test.drop('label', axis = 1).iterrows():
    pd.DataFrame([x[1][1:6]]).to_csv("labels/val/"+str(x[1][0]).zfill(7)+".txt", sep=' ', header=False, index=False)

[class_num          12
x_center     0.495417
y_center     0.189342
width        0.990833
height       0.378685
Name: 0, dtype: object]
[class_num          12
x_center     0.463867
y_center     0.234848
width        0.927734
height       0.469697
Name: 1, dtype: object]
[class_num          12
x_center     0.496094
y_center     0.241727
width        0.992188
height       0.483453
Name: 2, dtype: object]
[class_num          12
x_center     0.491667
y_center     0.303922
width        0.983333
height       0.607843
Name: 3, dtype: object]
[class_num          12
x_center     0.430664
y_center     0.213267
width        0.861328
height       0.426534
Name: 4, dtype: object]
[class_num          12
x_center       0.4925
y_center     0.203202
width           0.985
height       0.406404
Name: 5, dtype: object]
[class_num          12
x_center     0.476074
y_center     0.230986
width        0.952148
height       0.461972
Name: 6, dtype: object]
[class_num          12
x_center      0.48375
y_center  

## Move trainval and test images into their respective directories

In [13]:
import os
import shutil

def move(src, dest):
    shutil.move(src, dest)

source = '/home/barrett/aircraft/fgvc-aircraft-2013b/data/images/'
train_dest = '/home/barrett/aircraft/fgvc-aircraft-2013b/data/images/train/'

images_family_trainval.apply(lambda row: move(source+str(row.image)+'.jpg',
                                              train_dest), axis=1)

0       None
1       None
2       None
3       None
4       None
        ... 
6662    None
6663    None
6664    None
6665    None
6666    None
Length: 6667, dtype: object