# Use Pickle format for later training
For convenient training in later recommend you to use pickle format which is popular dataformat in machine-learning.<br/>
In this notebook, you will learn how to use annotation informations from clip image to dump as pickle.

In [1]:
from __future__ import print_function
%matplotlib inline
import glob
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (0.8, 0.8)
import pandas as pd
import numpy as np
from scipy.misc import imresize
import os
import pickle

The following path is default if you download dataset via `dowload_dataset.sh`.

In [2]:
train_dir = 'data/train/GTSRB/Final_Training/Images/*'
test_dir = 'data/test/GTSRB/Final_Test/Images/'
# default image size for later training
img_shape = (32, 32)
n_cls = 43

### Clip image which is according to annotation data.
The annotation information including 'Filename', 'Roi.X1', 'Roi.Y1', 'Roi.X2', 'Roi.Y2', 'ClassId' will be applied to this.

In [3]:
def _clip_image_with_annotation_info(dir, isTrain=True):
    annotation_path = glob.glob(os.path.join(dir, '*.csv'))[0]
    print(annotation_path + '...')
    annotation = pd.read_csv(annotation_path, delimiter=';')
    imgs = []
    classids = []
    for _, row in annotation.iterrows():
        img = mpimg.imread(os.path.join(dir, row['Filename']))
        x1 = int(row['Roi.X1'])
        y1 = int(row['Roi.Y1'])
        x2 = int(row['Roi.X2'])
        y2 = int(row['Roi.Y2'])
        img = imresize(img[x1:x2, y1:y2], size=img_shape, mode='RGB')
        imgs.append(img)
        if isTrain:
            clsid = np.zeros(shape=n_cls)
            clsid[int(row['ClassId'])]=1
            classids.append(clsid)
    return imgs, classids

### Data loaded
step through the annotation files from train-set to test-set.

In [4]:
def load_data_with_annotations(dir, isTrain=True):
    features = []
    labels = []
    if isTrain:
        for subdir in glob.glob(dir):
            img, classid = _clip_image_with_annotation_info(subdir, isTrain=isTrain)
            features.extend(img)
            labels.extend(classid)
    else:
        img, _ = _clip_image_with_annotation_info(dir, isTrain=isTrain)
        features.extend(img)
    print('complete..')
    return {
        'features': np.array(features),
        'labels': np.array(labels)
    }

### Data dumps to pickle

In [5]:
train_data = load_data_with_annotations(train_dir)
test_data = load_data_with_annotations(test_dir, isTrain=False)

data/train/GTSRB/Final_Training/Images/00029/GT-00029.csv...
data/train/GTSRB/Final_Training/Images/00008/GT-00008.csv...
data/train/GTSRB/Final_Training/Images/00006/GT-00006.csv...
data/train/GTSRB/Final_Training/Images/00003/GT-00003.csv...
data/train/GTSRB/Final_Training/Images/00018/GT-00018.csv...
data/train/GTSRB/Final_Training/Images/00037/GT-00037.csv...
data/train/GTSRB/Final_Training/Images/00024/GT-00024.csv...
data/train/GTSRB/Final_Training/Images/00002/GT-00002.csv...
data/train/GTSRB/Final_Training/Images/00027/GT-00027.csv...
data/train/GTSRB/Final_Training/Images/00004/GT-00004.csv...
data/train/GTSRB/Final_Training/Images/00034/GT-00034.csv...
data/train/GTSRB/Final_Training/Images/00009/GT-00009.csv...
data/train/GTSRB/Final_Training/Images/00001/GT-00001.csv...
data/train/GTSRB/Final_Training/Images/00011/GT-00011.csv...
data/train/GTSRB/Final_Training/Images/00021/GT-00021.csv...
data/train/GTSRB/Final_Training/Images/00041/GT-00041.csv...
data/train/GTSRB/Final_T

In [6]:
pickle.dump(train_data, open('data/train.pkl', 'wb'))
pickle.dump(test_data['features'], open('data/test.pkl', 'wb'))