In [12]:
import os
import re
import pandas as pd
import numpy as np


In [13]:
root_path = './cs-t0828-2020-hw1'
train_src = 'training_data/training_data'
test_src = 'testing_data/testing_data'


In [14]:
print('\n==> generate train file list: {}'.format('images.csv'))
print('train file directory: {}'.format(os.path.join(root_path, train_src)))

img_fn_list = os.listdir(os.path.join(root_path, train_src))
print('number of files: {}'.format(len(img_fn_list)))
#print(img_fn_list)

images = [[re.split(".jpg", img_fn)[0], img_fn] for img_fn in img_fn_list]
images.sort()
#print(images)

dfObj = pd.DataFrame(images, columns=['Image ID', 'Image File Name'])
print(dfObj)
dfObj.to_csv(os.path.join(root_path, 'images.csv'), index = False)



==> generate train file list: images.csv
train file directory: ./cs-t0828-2020-hw1\training_data/training_data
number of files: 11185
      Image ID Image File Name
0       000001      000001.jpg
1       000002      000002.jpg
2       000003      000003.jpg
3       000007      000007.jpg
4       000009      000009.jpg
...        ...             ...
11180   016179      016179.jpg
11181   016182      016182.jpg
11182   016183      016183.jpg
11183   016184      016184.jpg
11184   016185      016185.jpg

[11185 rows x 2 columns]


In [15]:
print('\n==> generate test file list: {}'.format('test_images.csv'))
print('test file directory: {}'.format(os.path.join(root_path, test_src)))

img_fn_list = os.listdir(os.path.join(root_path, test_src))
print('number of files: {}'.format(len(img_fn_list)))
#print(img_fn_list)

images = [[re.split(".jpg", img_fn)[0], img_fn] for img_fn in img_fn_list]
images.sort()
#print(images)

dfObj = pd.DataFrame(images, columns=['Image ID', 'Image File Name'])
print(dfObj)
dfObj.to_csv(os.path.join(root_path, 'test_images.csv'), index = False)



==> generate test file list: test_images.csv
test file directory: ./cs-t0828-2020-hw1\testing_data/testing_data
number of files: 5000
     Image ID Image File Name
0      000004      000004.jpg
1      000005      000005.jpg
2      000006      000006.jpg
3      000008      000008.jpg
4      000019      000019.jpg
...       ...             ...
4995   016172      016172.jpg
4996   016175      016175.jpg
4997   016176      016176.jpg
4998   016180      016180.jpg
4999   016181      016181.jpg

[5000 rows x 2 columns]


In [16]:
training_labels_csv_filename = 'training_labels.csv'
training_labels_pd = pd.read_csv(os.path.join(root_path, training_labels_csv_filename))
print(training_labels_pd)
mycar = training_labels_pd.values.tolist()
mycar.sort()
#print(mycar)

          id                                label
0       9350          Ford F-150 Regular Cab 2007
1       2645                      BMW X6 SUV 2012
2       2267              BMW 1 Series Coupe 2012
3       8553              Fisker Karma Sedan 2012
4       6990  Dodge Ram Pickup 3500 Crew Cab 2010
...      ...                                  ...
11180    184                  Acura TL Sedan 2012
11181   5863          Chevrolet Malibu Sedan 2007
11182   2482        BMW 6 Series Convertible 2007
11183  14926            Suzuki Kizashi Sedan 2012
11184   2927              BMW M6 Convertible 2010

[11185 rows x 2 columns]


In [17]:
brands = list(set([img_brand for img_idx, img_brand in mycar]))
brands.sort()
#print(len(brands))
#print(brands)

class_label = [[idx, brand] for idx, brand in enumerate(brands)]
#print(len(class_label))
#print(class_label)


In [18]:
print('\n==> generate class file, containing class id and class name: {}'.format('class.csv'))
dfObj = pd.DataFrame(class_label, columns=['Class ID', 'Car Brand'])
print(dfObj)
dfObj.to_csv(os.path.join(root_path, 'class.csv'), index = False)



==> generate class file, containing class id and class name: class.csv
     Class ID                       Car Brand
0           0      AM General Hummer SUV 2000
1           1       Acura Integra Type R 2001
2           2             Acura RL Sedan 2012
3           3             Acura TL Sedan 2012
4           4            Acura TL Type-S 2008
..        ...                             ...
191       191  Volkswagen Golf Hatchback 2012
192       192            Volvo 240 Sedan 1993
193       193        Volvo C30 Hatchback 2012
194       194             Volvo XC90 SUV 2007
195       195   smart fortwo Convertible 2012

[196 rows x 2 columns]


In [19]:
from collections import OrderedDict 

name_to_id = {} 
for idx, brand in class_label:
    name_to_id[brand] = idx
#print(name_to_id)

id_to_name = {} 
for idx, brand in class_label:
    id_to_name[idx] = brand
#print(id_to_name)


In [20]:
print('\n==> generate train label file and transfer train label name to label id: {}'.format('image_class_labels.csv'))
image_class_labels = [[img_idx, name_to_id[img_brand]] for img_idx, img_brand in mycar]
#print(image_class_labels)

dfObj = pd.DataFrame(image_class_labels, columns=['Image ID', 'Class ID'])
print(dfObj)
dfObj.to_csv(os.path.join(root_path, 'image_class_labels.csv'), index = False)



==> generate train label file and transfer train label name to label id: image_class_labels.csv
       Image ID  Class ID
0             1         0
1             2         0
2             3         0
3             7         0
4             9         0
...         ...       ...
11180     16179       195
11181     16182       195
11182     16183       195
11183     16184       195
11184     16185       195

[11185 rows x 2 columns]


In [21]:
print('\n==> generate train / train phase test split with ratio train/tets = 7/1: {}'.format('train_test_split.csv'))
train_test_split = []
split_ratio = 7+1
split = 1
for idx, class_label in image_class_labels:
    train_test_split.append([idx, 1 if (split%split_ratio) else 0])
    split += 1
#print(train_test_split)

dfObj = pd.DataFrame(train_test_split, columns=['Image ID', 'Train Test Split'])
print(dfObj)
dfObj.to_csv(os.path.join(root_path, 'train_test_split.csv'), index = False)


==> generate train / train phase test split with ratio train/tets = 7/1: train_test_split.csv
       Image ID  Train Test Split
0             1                 1
1             2                 1
2             3                 1
3             7                 1
4             9                 1
...         ...               ...
11180     16179                 1
11181     16182                 1
11182     16183                 1
11183     16184                 0
11184     16185                 1

[11185 rows x 2 columns]


In [23]:
def accuracy_log(f_name):

    f = open(f_name)
    lines = f.readlines()
    print(type(lines))

    epoch= []
    train_acc= []
    test_acc= []
    for line in lines:
        #print(line)
        matchObj = re.match(r'epoch:(\d+) - train loss: (\d+.\d+) and train acc: (\d+.\d+) total sample: (\d+)', line)
        if matchObj:
            #print('{}\n{}\n{}\n{}\n{}\n'.format(matchObj.group(0), matchObj.group(1), matchObj.group(2), matchObj.group(3), matchObj.group(4)))
            epoch.append(matchObj.group(1))
            train_acc.append(matchObj.group(3))
        matchObj = re.match(r'epoch:(\d+) - test loss: (\d+.\d+) and test acc: (\d+.\d+) total sample: (\d+)', line)
        if matchObj:
            #print('{}\n{}\n{}\n{}\n{}\n'.format(matchObj.group(0), matchObj.group(1), matchObj.group(2), matchObj.group(3), matchObj.group(4)))
            test_acc.append(matchObj.group(3))
    for idx in range(len(epoch)):
        print(epoch[idx], train_acc[idx], test_acc[idx])
    
    f.close()

    print(np.argmax(train_acc))
    test_max_idx = np.argmax(test_acc)
    print(epoch[test_max_idx], test_acc[test_max_idx])

    sorted_idx = np.argsort(test_acc)[::-1]
    print([test_acc[i] for i in sorted_idx])
    print([epoch[i] for i in sorted_idx])
    
    return

accuracy_log("./models/20201101_142159/train_test.log")

FileNotFoundError: [Errno 2] No such file or directory: './models/20201101_142159/train_test.log'