In [None]:
import os
import glob
import cv2
import time
import math
import random

TRAIN_PATH="../capdata/train"
TEST_PATH="../capdata/test"
BATCH_SIZE="10"

def get_im_cv2(path, img_rows, img_cols, color_type=1):
    if color_type == 1:           # Load as grayscale
        img = cv2.imread(path, 0)
    elif color_type == 3:         # Load colored image
        img = cv2.imread(path)
    # Reduce size
    resized = cv2.resize(img, (img_cols, img_rows))
    return resized

def get_im_cv2_mod(path, img_rows, img_cols, color_type=1):
    # Load as grayscale
    if color_type == 1:
        img = cv2.imread(path, 0)
    else:
        img = cv2.imread(path)
    # Reduce size
    rotate = random.uniform(-10, 10)
    M = cv2.getRotationMatrix2D((img.shape[1]/2, img.shape[0]/2), rotate, 1)   #randomly generate a rotating matrix
    img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))                 #rotate img 
    resized = cv2.resize(img, (img_cols, img_rows), cv2.INTER_LINEAR)
    return resized

def get_driver_img_list():
    dr = dict()
    path = os.path.join('..', 'input', 'driver_imgs_list.csv')
    print("Read drivers' list... ...")
    f = open(path, 'r')
    line = f.readline()
    while (1):
        line = f.readline()
        if line == '':
            break
        arr = line.strip().split(',')
        dr[arr[2]] = arr[0]
    f.close()
    return dr

def load_train(img_rows, img_cols, color_type=1):
    X_train = []
    y_train = []
    driver_id = []
    start_time = time.time()
    driver_data = get_driver_img_list()      # read from driver_imgs_list.csv, content like p002--c0--img_44733.jpg 

    print('Read train images... ... ')
    for drlabel in range(10):
        print('Load folder c{}... ...'.format(drlabel))
        path = os.path.join('..', 'input', 'train', 'c' + str(drlabel), '*.jpg')  #..\input\train\c0\img_?.jpg
        files = glob.glob(path)                      #collect all *.jpg files' name into files[]
        for imgfile in files:
            img = get_im_cv2_mod(imgfile,img_rows, img_cols, color_type)
            X_train.append(img)
            y_train.append(drlabel)                  
            filebase = os.path.basename(imgfile)     #filebase is filename excluding the dirpath
            driver_id.append(driver_data[filebase])  #assemble driver_id[] from driver_imgs_list.csv

    print('Read train data time: {} seconds'.format(round(time.time() - start_time, 2)))
    unique_drivers = sorted(list(set(driver_id)))
    print('Unique drivers: {}'.format(len(unique_drivers)))
    print(unique_drivers)
    return X_train, y_train, driver_id, unique_drivers


def load_test(img_rows, img_cols, color_type=1):
    print('Read test images... ...')
    start_time = time.time()
    path = os.path.join('..', 'input', 'test', '*.jpg')
    files = glob.glob(path)
    X_test = []
    X_test_id = []
    total = 0
    thr = math.floor(len(files)/10)               #split test files into 10 batches
    for imgfile in files:
        img = get_im_cv2_mod(imgfile, img_rows, img_cols, color_type)
        X_test.append(img)
        filebase = os.path.basename(imgfile)
        X_test_id.append(filebase)
        total += 1
        if total%thr == 0:                        #each time start a new batch, print out a message
            print('Read {} images from {}'.format(total, len(files)))
    
    print('Read test data time: {} seconds'.format(round(time.time() - start_time, 2)))
    return X_test, X_test_id

In [27]:
# Test 

import os
import glob
import cv2
import time
import math
import random

TRAIN_PATH="../capdata/train"
TEST_PATH="../capdata/test"
BATCH_SIZE="10"

def get_im_cv2(path, img_rows, img_cols, color_type=1):
    if color_type == 1:           # Load as grayscale
        img = cv2.imread(path, 0)
    elif color_type == 3:         # Load colored image
        img = cv2.imread(path)
    # Reduce size
    resized = cv2.resize(img, (img_cols, img_rows))
    return resized

def get_im_cv2_mod(path, img_rows, img_cols, color_type=1):
    # Load as grayscale
    if color_type == 1:
        img = cv2.imread(path, 0)
    else:
        img = cv2.imread(path)
    # Reduce size
    rotate = random.uniform(-100, 100)
    M = cv2.getRotationMatrix2D((img.shape[1]/2, img.shape[0]/2), rotate, 1)   #randomly generate a rotating matrix
    img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))                 #rotate img 
    resized = cv2.resize(img, (img_cols, img_rows), cv2.INTER_LINEAR)
    return resized


#testpath="../input/train/c1/img_6.jpg"
#img=get_im_cv2_mod(testpath, 480, 640, color_type=1)
#print(img)
#print('img.shape=',img.shape)
#cv2.imshow("img",img)
#cv2.waitKey(0)  


def load_test(img_rows, img_cols, color_type=1):
    print('Read test images... ...')
    start_time = time.time()
    path = os.path.join('..', 'input', 'test', '*.jpg')
    files = glob.glob(path)
    X_test = []
    X_test_id = []
    total = 0
    thr = math.floor(len(files)/10)
    for imgfile in files:
        img = get_im_cv2_mod(imgfile, img_rows, img_cols, color_type)
        X_test.append(img)
        filebase = os.path.basename(imgfile)
        X_test_id.append(filebase)
        total += 1
        if total%thr == 0:
            print('Read {} images from {}'.format(total, len(files)))
    
    print('Read test data time: {} seconds'.format(round(time.time() - start_time, 2)))
    return X_test, X_test_id

X_test,X_test_id=load_test(480,640)



Read test images... ...
Read 7972 images from 79726
Read 15944 images from 79726
Read 23916 images from 79726
Read 31888 images from 79726
Read 39860 images from 79726
Read 47832 images from 79726
Read 55804 images from 79726
Read 63776 images from 79726
Read 71748 images from 79726
Read 79720 images from 79726
Read test data time: 816.8 seconds
