In [32]:
import numpy as np
import os
import pickle
import pandas as pd
import time
import warnings
import cv2
from numpy.random import permutation
from keras.utils import np_utils



In [25]:
use_cache = 1
dataset_path = r'../Distracted-Driver-Detection/Dataset/'
np.random.seed(2016)
warnings.filterwarnings("ignore")

In [2]:
def get_im_cv2(path):
    img = cv2.imread(path)
    resized = cv2.resize(src=img, dsize=(224, 224), interpolation=cv2.INTER_LINEAR)
    return resized

In [20]:
def load_train():
    df = pd.read_csv(dataset_path + 'auc.distracted.driver.train.csv')
    x = df.iloc[:,0]
    y = df.iloc[:,1]
    X_train = []
    Y_train = []
    print('Read train images')
    for i in range (0,len(x)):
        fl= dataset_path + r'v1_cam1_no_split/' + r'/'.join(x[i].split('/')[-2:])
        print(fl)
        img = get_im_cv2(fl)
        X_train.append(img)
        Y_train.append(y[i])
    return X_train, Y_train

In [27]:
def load_valid():
    df = pd.read_csv(dataset_path + 'auc.distracted.driver.test.csv')
    x = df.iloc[:,0]
    y = df.iloc[:,1]
    X_valid = []
    Y_valid = []
    print('Read test images')
    for i in range (0,len(x)):
        fl = dataset_path + r'v1_cam1_no_split/' + r'/'.join(x[i].split('/')[-2:])
        print(fl)
        img = get_im_cv2(fl)
        X_valid.append(img)
        Y_valid.append(y[i])
    return X_valid, Y_valid

In [29]:
def cache_data(data, path):
    if os.path.isdir(os.path.dirname(path)):
        file = open(path, 'wb')
        pickle.dump(data, file)
        file.close()
    else:
        print('Directory doesnt exists')

In [30]:
def restore_data(path):
    data = dict()
    if os.path.isfile(path):
        file = open(path, 'rb')
        data = pickle.load(file)
    return data

In [None]:
def read_and_normalize_train_data():
    cache_path = os.path.join('/home/gpu3/Desktop/mobileVGG','cache', 'train_r_' + str(224) + '_c_' + str(224) + '_t_' + str(3) + '.dat')
    if not os.path.isfile(cache_path) or use_cache == 0:
        train_data, train_target= load_train()
        cache_data((train_data, train_target), cache_path)
    else:
        print('Restore train from cache!')
        (train_data, train_target) = restore_data(cache_path)
    
    print('Convert to numpy...')
    train_data = np.array(train_data, dtype=np.uint8)
    train_target = np.array(train_target, dtype=np.uint8)
    
    print('Reshape...')
    train_data = train_data.transpose((0, 1, 2, 3))

    # Normalise the train data
    print('Convert to float...')
    train_data = train_data.astype('float16')
    mean_pixel = [80.857, 81.106, 82.928]
    
    print('Substract 0...')
    train_data[:, :, :, 0] -= mean_pixel[0]
    
    print('Substract 1...')
    train_data[:, :, :, 1] -= mean_pixel[1]

    print('Substract 2...')
    train_data[:, :, :, 2] -= mean_pixel[2]

    train_target = np_utils.to_categorical(train_target, 10)
    
    # Shuffle experiment START !!
    perm = permutation(len(train_target))
    train_data = train_data[perm]
    train_target = train_target[perm]
    # Shuffle experiment END !!
    
    print('Train shape:', train_data.shape)
    print(train_data.shape[0], 'train samples')
    return train_data, train_target

In [None]:
def read_and_normalize_test_data():
    start_time = time.time()
    cache_path = os.path.join('/home/gpu3/Desktop/mobileVGG','cache', 'test_r_' + str(224) + '_c_' + str(224) + '_t_' + str(3) + '.dat')

    if not os.path.isfile(cache_path) or use_cache == 0:
        test_data, test_target = load_valid()
        cache_data((test_data, test_target ), cache_path)
    else:
        print('Restore test from cache [{}]!')
        (test_data, test_target) = restore_data(cache_path)

    test_data = np.array(test_data, dtype=np.uint8)
    test_data = test_data.transpose((0, 1, 2, 3))

    # Normalise the test data data

    test_data = test_data.astype('float16')
    mean_pixel = [80.857, 81.106, 82.928]

    test_data[:, :, :, 0] -= mean_pixel[0]

    test_data[:, :, :, 1] -= mean_pixel[1]

    test_data[:, :, :, 2] -= mean_pixel[2]

    test_target = np_utils.to_categorical(test_target, 10)
    print('Test shape:', test_data.shape)
    print(test_data.shape[0], 'test samples')
    print('Read and process test data time: {} seconds'.format(round(time.time() - start_time, 2)))
    return test_data, test_target