In [1]:
import os
import numpy as np
import cv2
import keras.utils 
%matplotlib inline
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import math
import pandas as pd

Using TensorFlow backend.


In [2]:
def normalize(image):
    """
    #Argument: 
        img: shape(width, heigh, channel)
    #return:
        img normalize
    """
    width = X.shape[0]
    heigh = X.shape[1]
    channel = X.shape[2]
    for i in range(0,width):
        image[i,:,:] = image[i,:,:]/np.max(image[i,:,:], axis = 0) 
    return image
def normalize_standard(X):
    """
    #Argument: 
        X: shape(num, width, heigh, channel)
    #return:
        X normalize
    """
#     caculate mean
    num_img = X.shape[0]
    width = X.shape[1]
    heigh = X.shape[2]
    channel = X.shape[3]
    total_value_pixel = np.sum(X, axis = 3) #shape (num,width, heigh)
    total_value_pixel = np.sum(X, axis = 0) #shape (width, heigh)
    mean = total_value_pixel/(channel * num_img)
    
#     caculate standard deviation
    if channel == 3:
        channel1 = np.square(X[:,:,:,0] - mean)
        channel2 = np.square(X[:,:,:,1] - mean)
        channel3 = np.square(X[:,:,:,2] - mean)
        total = channel1 + channel2 + channel3
    elif channel == 1:
        total = X[:,:,:,0] - mean
    
    std = math.sqrt(total/(channel*num_img))
    
    x0 = (X[:,:,:,0] - mean)/std
    x1 = (X[:,:,:,1] - mean)/std
    x2 = (X[:,:,:,2] - mean)/std
    
    return np.concatenate((x0,x1,x2),axis = 0)

def read_data_info_from_csv(csv_path,is_crop):
    """
    #Argument:
        csv_path: path of file csv that container information of data per class 
        (ex: file name, size, roi.x, roi.y, class id)
    #Return:
        images, labels, number image of class
        type: list
    """
    print(csv_path)
    images =[]
    labels=[]
    data_info_csv = pd.read_csv(csv_path, sep=';')
    num_img_of_class = len(data_info_csv["Filename"])
    parent_dir_name =os.path.dirname(csv_path)
    for i in range(num_img_of_class):
        #read information of csv file
        img_name = data_info_csv["Filename"][i]
        roi_x1=data_info_csv['Roi.X1'][i]
        roi_y1 = data_info_csv['Roi.Y1'][i]
        roi_x2 =data_info_csv['Roi.X2'][i]
        roi_y2 =data_info_csv['Roi.Y2'][i]
        label = data_info_csv['ClassId'][i]
        img_path = os.path.join(parent_dir_name,img_name)
        image = cv2.imread(img_path)
        image = normalize(image)
        if is_crop:
             image = image[roi_y1:roi_y2,roi_x1:roi_x2,:]
        image = cv2.resize(image,(32,32))
        images.append(image)
        labels.append(label)
    
    images = np.asarray(images)
    images = normalize_standard(images)
    return  images,labels,num_img_of_class

def read_data(data_dir,is_crop):
    """
    #argument:
        data_dir: path of directory
    #return:
        images, labels, number of sample
    """
     # Get all subdirectories of data_dir. Each represents a label.
    directories = [d for d in os.listdir(data_dir) 
                   if os.path.isdir(os.path.join(data_dir, d))]
     # Loop through the label directories and collect the data in
    # two lists, labels and images.
    print(f"directories: {directories}")
    labels = []
    images = []
    num_sample =0;
    num_classes =0;
    total_num_img =0
    for index,d in enumerate(directories):
        label_dir = os.path.join(data_dir, d)
        #find path data information in csv file   
        path_info_data_csv = [os.path.join(label_dir, f)  for f in os.listdir(label_dir) if f.endswith(".csv")]
        
        for fn_csv in path_info_data_csv:      
            imgs, lbs,num_img_of_class = read_data_info_from_csv(fn_csv,is_crop)
            total_num_img = total_num_img+num_img_of_class;
            
            images.extend(imgs)
            labels.extend(lbs)
            
            print(f"index_name: {index}, directory : {d}, total number of image: {num_img_of_class}")
        num_classes=num_classes+1; 
    print(f"total_num_img: {total_num_img}")
    
    #read label's name
 

    images= np.asarray(images)
    labels = np.asarray(labels)
    
    return images, labels,num_classes
##test read data train and test
# images,labels,num_classes = read_data(r'..\data\train\Images')
# images,labels,num_classes = read_data_info_from_csv(r'..\data\test\Images\GT-online_test.csv')
# print(np.asarray(images).shape)

def get_labels_name():
    """
    get labels' name
    """
    labels_name =[]
    sign_nanme_csv = pd.read_csv(r"..\sign_name.csv",delimiter=';')
    labels_name = sign_nanme_csv['SignName']
    labels_name=np.asarray(labels_name)
    return labels_name

def load_data(pickle_fn,is_crop=False):
    """
    #Argument
        pickle_fn: name of data file
    #return:
        data{
            'ims':images, 
            'lbs':labels,
            'num_classes':num_classes,
            'lbs_name':labels_name
        }
    
    check if pickle_fn is exsist => read file 
    else: read data from direction "..\data\train\GTSRB"
    
    """
    if not os.path.isfile(pickle_fn):
        print("create pickle file")
        data_train_dir=r"..\data\train\GTSRB"
        images, labels,num_classes = read_data(data_train_dir,is_crop)
        labels_name = get_labels_name()
        mydata = {'ims':images, 'lbs':labels, 'num_class':num_classes,'lbs_name':labels_name}
        pickle.dump(mydata, open(pickle_fn, 'wb'))
    else:
        print("load pickle.....")
        mydata= pickle.load(open(pickle_fn,'rb'))
        images = mydata['ims']
        labels = mydata['lbs']
        num_classes = mydata['num_class']
        labels_name = mydata['lbs_name']
    return images, labels, num_classes,labels_name
def print_data_info():
    print(f"len image: {len(images)}")
    print(f'images.shape: {images.shape}')
    print(f"labels: {labels}")
    print(f"labels shape: {labels.shape}")

    
# pickle_fn=r"..\data_compressed\data_no_crop_30_09.p"
# images,labels,num_classes,labes_name=load_data(pickle_fn,is_crop=False)
# print_data_info()
# print(np.max(images))


In [3]:
def convert_onehot(labels,num_classes):
    labels_onehot= keras.utils.to_categorical(labels,num_classes)
    return labels_onehot

def convert_onehot_numpy(labels,num_classes):
    labels_onehot = np.eye(num_classes)[labels]
    return labels_onehot

def split_data(X,y, valid_size=0.2,shuffle=True):
    x_train, x_valid,y_train,y_valid = train_test_split(x,y,test_size =valid_size,shuffle =shuffle)
    return  x_train, x_valid,y_train,y_valid

# transform image
import warnings 
from skimage.transform import rotate,ProjectiveTransform, warp
from skimage import exposure

def _convert_shape_to_4_dementions(image):
    """
    change shape of image to 4 dementions to caculate transform and plot (num_img,img_size,img_size,channel)
    """
    shape_img = image.shape
    len_shape_img = len(shape_img)
    # shape image is (img_size,img_size,channel=3 or1)  or gray image(num_img,img_size,img_size)
    if len_shape_img==3:
        if image.shape[2] == 3 or image.shape[2] == 1:
            image = image.reshape(-1,image.shape[0],image.shape[1],image.shape[2])
        else:
            image=image.reshape(image.shape +(1,))
            
    return image
    
def transform_rotate(X, intensity=0.75):
    """
    #Arguments
        X: images
        intensity: cuong do, the value must be (0,1]
    #return:
        image is roated with intensity
    """
    X=_convert_shape_to_4_dementions(X)
    
    X_rotate =[]
    for i in range(X.shape[0]):
        delta = 25. * intensity # scale using augmentation intensity
        x = rotate(X[i], np.random.uniform(-delta, delta), mode = 'edge')
        X_rotate.append(x)
    
    X_rotate= np.asarray(X_rotate)
    return X_rotate

def transform_projection(X, intensity = 0.75):
    """
    convert image to image distortion
    X: image data
    intensity: cuong do bop meo image, value must be (0,1]
    
    return: array image is distorted with same shape
    
    for example: http://scikit-image.org/docs/dev/auto_examples/xx_applications/plot_geometric.html
    """
    # trục tọa độ: gốc (0,0) là góc trên bên trái
    
    X=_convert_shape_to_4_dementions(X)
    image_size = X.shape[1]
    d = image_size *0.25*intensity
    x_pre = []
    
    for i in range(X.shape[0]):
        tl_top = np.random.uniform(-d, d)     # Top left corner, top margin
        tl_left = np.random.uniform(-d, d)    # Top left corner, left margin
        bl_bottom = np.random.uniform(-d, d)  # Bottom left corner, bottom margin
        bl_left = np.random.uniform(-d, d)    # Bottom left corner, left margin
        tr_top = np.random.uniform(-d, d)     # Top right corner, top margin
        tr_right = np.random.uniform(-d, d)   # Top right corner, right margin
        br_bottom = np.random.uniform(-d, d)  # Bottom right corner, bottom margin
        br_right = np.random.uniform(-d, d)   # Bottom right corner, right margin

        src = np.array([[0,0],[0,image_size],[image_size,image_size],[image_size,0]])
        
        dst = np.array([[tl_left,tl_top], 
                       [bl_left,image_size-bl_bottom],
                       [image_size-br_right,image_size-br_bottom],
                       [image_size-tr_right,tr_top]])
        
        transform = ProjectiveTransform()
        transform.estimate(src=src,dst=dst)
        
        #using mode='edge' to remove the black space in image after use transform  
        img_preprocess = warp(X[i], transform, output_shape=(image_size, image_size), order = 1, mode = 'edge')
        x_pre.append(img_preprocess)
    
    x_pre = np.asarray(x_pre)
    return x_pre


def transform_brightness(images):
    images = _convert_shape_to_4_dementions(images)
   
    new_images=[]
    for img in images:
        ratio = np.random.uniform(low=0.4,high=4.,size=1)
        img_1 = exposure.adjust_gamma(img,ratio)
        new_images.append(img_1)
        
    new_images = np.asarray(new_images)
    return new_images

def preprocess_dataset(X, y = None,is_shuffle=False):
    """
    param X: images input
    param y: labels, the dimention is 1
    
    return:
        images with channel is 1 and the dimetion is (num_img,img_size,img_size,1)
        the value of pixel is normalize
    
    image will be processed before use as input of model
    
    """
    #Convert to grayscale, e.g. single Y channel
    # cong thuc : Y =0.299R+0.587G+0.114B, U = -0.147R-0.289G+0.436B, V=0.615R-0.515G-0.100B
    # YUV channle
    X = 0.299 * X[:, :, :, 0] + 0.587 * X[:, :, :, 1] + 0.114 * X[:, :, :, 2]

    #Scale features to be in [0, 1]
   
      
    # Apply localized histogram localization  
    for i in range(X.shape[0]):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            if np.max(X[i])>1:
                X[i] = (X[i] / 255.).astype(np.float32)
            X[i] = exposure.equalize_adapthist(X[i])
        
    if is_shuffle:  
        # Shuffle the data
        X, y = shuffle(X, y)

    # Add a single grayscale channel
    X = X.reshape(X.shape + (1,)) 
    return X, y

def preprocess_input_gray_norm(images):
    img_intput_model = []
    
    num_img = images.shape[0]
    for i in range(num_img):
        img = images[i]
        red_channel =img[:,:,0]
        green_channel = img[:,:,1]
        blue_channel = img[:,:,2]
        
        #convert image to gray by red, greeen, blue channel
        # can't use cv2.cvtColor because the type's pixcel's value are not int8 (maybe [0,1],float32)
        gray_img = 0.2989 * red_channel + 0.5870 * green_channel + 0.1140 * blue_channel
        
        if (np.max(gray_img)>1):
            gray_img = gray_img/255.
        img_intput_model.append(gray_img)
        
    img_intput_model = np.asarray(img_intput_model)
    img_intput_model = normalize_standard(img_intput_model)
    img_intput_model = img_intput_model.astype(np.float32)
    #convert input to 4 dimention
    img_intput_model = img_intput_model.reshape(img_intput_model.shape +(1,))
    return img_intput_model

def crop_image(images):
    images = _convert_shape_to_4_dementions(images)
   
    new_images=[]
    crop_size = np.random.randint(5,25)
    for img in images:
        img = cv2.resize(img,(32,32))
#         print(f"crop_image: shape img: {img.shape}")
        red_channel =img[:,:,0]
        green_channel = img[:,:,1]
        blue_channel = img[:,:,2]
        
        #convert image to gray by red, greeen, blue channel
        # can't use cv2.cvtColor because the type's pixcel's value are not int8 (maybe [0,1],float32)
        img = 0.2989 * red_channel + 0.5870 * green_channel + 0.1140 * blue_channel
        
        img = np.reshape(img, (32,32,1))
        if crop_size > 15:
            img1 = img[0:crop_size,0:crop_size,:]
        else:
            img1 = img[crop_size:32,crop_size:32,:]
        img1 = cv2.resize(img1,(32,32))
        new_images.append(img1)

    new_images = np.asarray(new_images)
    return new_images

"""plot image"""

def plot_histogram_contrast(img):
    """
    the different between low and high  contrast (tuong phan)
    show the chart of y, u channel
    y's chanel value is spreaded => high
    """
    img = cv2.cvtColor(img,cv2.COLOR_RGB2YUV)
    y,u,v = [img[:,:,i] for i in range(3)]
    u=u.reshape(-1)
    y=y.reshape(-1)
    fig,axes = plt.subplots(1,2)
    for ax in axes:
        ax.axis('off')
    axes[0].hist(u,256 ,range=(0, 256), fc='k', ec='k')
    axes[0].set_title("low contrast")
    
    axes[1].hist(y,256 ,range=(0, 256), fc='k', ec='k')
    axes[1].set_title("high contrast")
    
    plt.show()

def plot_image(images,rows,cols):
    images=_convert_shape_to_4_dementions(images)
    
    num_img = images.shape[0]
#   
#     print("plto iamge: images shape: ", images.shape)
    if num_img < (rows*cols):
        rows= math.ceil(math.sqrt(num_img))
        cols =math.ceil(math.sqrt(num_img))
    
    #check if image in images is gray image, must reshape image to (num_img,img_size,img_size) to plot img 
    if (images.shape[-1]==1):
        img_height = images.shape[1]
        img_width = images.shape[2]
        images=images.reshape(num_img,img_height,img_width)
    
    fig,axes = plt.subplots(nrows=rows,ncols=cols,figsize=(20,20),clear=True)
    plt.subplots_adjust(hspace=0.5,wspace =0.5)
    
    axes = np.array(axes)
#     print(f"fig: {fig}, ax: {axes.shape}")
    for i,ax in enumerate(axes.reshape(-1)):
        #convert bgr to rgb
#         index = np.random.randint(0,num_img)
        index =i
#         print("num_img: ",num_img)
#         print("index; ",index)
        if index >= num_img:
#             print("out of num_img")
            break;
        #check if img is gray iamge
        if images.shape[-1]!=3:
#             print(f"images shape: {images.shape} , index: {index}")
            image = images[index]
#             print("img shape: ", image.shape)
            ax.imshow(image, cmap='gray')
        else:
#             print("plt image: images shape: ",images.shape)
#             print('index: ',index)
            image = images[index]
            image = image[:,:,::-1] #convert bgr to rgb to show on matplot
            ax.imshow(image) #,aspect='auto'
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.axis('off')
    plt.show()
    
def plot_compare_transform_data(X):
    import math
    X= _convert_shape_to_4_dementions(X)
    num_img = X.shape[0]
    if (num_img >10):
        num_img=10
   
    #compare between preprocess_img, original image, gray image
    print("origin image")
    plot_image(X,1,num_img)
   
    #gray
    print("gray image ")
    x_gray=[]
    for ind,img in enumerate(X):
        x = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
        x_gray.append(x)
    x_gray = np.asarray(x_gray)
    
    print(f"x_gray shape:  nmax: {np.max(x_gray[0])}, nmin: {np.min(x_gray[0])}")
    plot_image(x_gray,1,num_img)
    
    print("pre process iamge")
    X_preprocess,y= preprocess_dataset(X)
    plot_image(X_preprocess,1,num_img)
    
    #crop_image
    X_crop = crop_image(X)
    print("plot crop image")
    plot_image(X_crop,1,num_img)
    
    #rotated
    X_rotated = transform_rotate(X)
    print(f"rotated  nmax: {np.max(X_rotated[0])}, nmin: {np.min(X_rotated[0])}")
    plot_image(X_rotated,1,num_img)
    print("rotated ->preprocess")
    X_rotated_pp,y= preprocess_dataset(X_rotated)
    plot_image(X_rotated_pp,1,num_img)
    
    # image distortion
    X_distortion=transform_projection(X,intensity=0.75)
    print(f'distorted nmax: {np.max(X_distortion[0])}, nmin: {np.min(X_distortion[0])}')
    plot_image(X_distortion,1,num_img)
    print("distorted -> preprocess")
    X_dst_pp,y= preprocess_dataset(X_distortion)
    plot_image(X_dst_pp,1,num_img)
    
    #brightness
    X_brightness = transform_brightness(X)
    print(f"brightness nmax: {np.max(X_brightness[0])}, nmin: {np.min(X_brightness[0])}")

    plot_image(X_brightness,1,num_img)
    print("brightness=>pre process")
    X_brightness_pp,y= preprocess_dataset(X_brightness)
    plot_image(X_brightness_pp,1,num_img)
    
#     #test input of transform has (num_img,img_size,img_size,1) dimention
#     print("test input is gray image")
#     X_rotated1 = transform_rotate(X_dst_pp)
#     plot_image(X_rotated1,1,num_img)

def plot_compare_transform_data_for_khoaluan(X):
    import math
    X= _convert_shape_to_4_dementions(X)
    num_img = X.shape[0]*6
    if (num_img >10):
        num_img=10
   
    #compare between preprocess_img, original image, gray image
    print("origin image")
    plt.imshow(X[0,:,:,::-1])
#     x_ori = plot_image(X,1,num_img)
        
    #gray
#     print("gray image ")
#     x_gray=[]
#     for ind,img in enumerate(X):
#         x = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
#         x_gray.append(x)
#     x_gray = np.asarray(x_gray)
    
#     print(f"x_gray shape:  nmax: {np.max(x_gray[0])}, nmin: {np.min(x_gray[0])}")
#     plot_image(x_gray,1,num_img)
    
    #rotated
    X_rotated = []
    for i in range(0,6):
        x_rotated = transform_rotate(X)
        X_rotated.append(x_rotated)
    X_rotated = np.asarray(X_rotated)
    print(X_rotated.shape)
    
    X_rotated = X_rotated.reshape(num_img,32,32,3)
    print(X_rotated.shape)
    print(f"rotated  nmax: {np.max(X_rotated[0])}, nmin: {np.min(X_rotated[0])}")
    plot_image(X_rotated,1,num_img)
    
    # image distortion
    X_distortion=[]
    for i in range(0,6):
        x_dis = transform_projection(X,intensity=0.75)
        X_distortion.append(x_dis)
    print(f'distorted nmax: {np.max(X_distortion[0])}, nmin: {np.min(X_distortion[0])}')
    X_distortion = np.asarray(X_distortion)
    X_distortion = X_distortion.reshape(num_img,32,32,3)
    plot_image(X_distortion,1,num_img)
    
    #brightness
    X_brightness = []
    for i in range(0,6):
        x_bri = transform_brightness(X)
        X_brightness.append(x_bri)
    print(f"brightness nmax: {np.max(X_brightness[0])}, nmin: {np.min(X_brightness[0])}")
    X_brightness = np.asarray(X_brightness)
    X_brightness = X_brightness.reshape(num_img,32,32,3)

    plot_image(X_brightness,1,num_img)
    
        
    #crop_image
    X_crop = []
    for i in range(0,6):
        x_crop = crop_image(X)
        X_crop.append(x_crop)
    print("plot crop image")
    X_crop = np.asarray(X_crop)
    X_crop = X_crop.reshape(num_img,32,32,3)

    plot_image(X_crop,1,num_img)
    
# #Test
# index = np.random.randint(0,39000,size=10,dtype=int)
# print("indexx: ",index)
# X = images[index]
# print("X.shape: ",X.shape)
# plot_image(X,1,9)
# plot_compare_transform_data(X)
# imgs = preprocess_input_gray_norm(X)
# print(imgs)

In [4]:
# test plot image
#readdata
def read_data(pickle_fn):
    if os.path.isfile(pickle_fn):
        data= pickle.load(open(pickle_fn,'rb'))
        print(f'done read_dataa from {pickle_fn}')
        return data
    else:
        raise Exception("the file data enhancement is not found")

In [5]:
# data = read_data(r"..\data_enhancement\data_input_gray_no_crop_full_15_12_2018.p");
# print(data.keys())


In [6]:
# X = data['ims'] 
# X.shape
# index = np.random.randint(0, 20000,size = 20)
# plot_image(X[index],3,5)

In [7]:
# d2 = read_data(r"D:\Downloads\mydata_no_crop.p")
# X = d2['ims']
# index = np.random.randint(0, 20000,size = 1)
# print('index:',index)
# # index: 7346, 5343, 11024
# for i in range (0,2):
#     plot_compare_transform_data_for_khoaluan(X[5343])

In [8]:
def choose_transform_function(func_key):
    """
    #arguments
        func_key: the key represent transform function
    #return
        one of transform function or None 
    """
    trans_func={
        0:transform_rotate,
        1:transform_projection,
        2:transform_brightness,
        3: crop_image
    }
    func = trans_func.get(func_key,None)
    return func

def generate_data(images,labels,ratio=1.3):
    """
    #arguments
        param images: data will be increased
        param labels: labels of data
        param ratio: the rate of data increased, if the highest sample is 100 and ratio = 5,
        the data will be around 5*100*num_clases
    #return:
        images, labels is generated
    #how it work:
        
    """
    #Count number of occurrences, labels must be 1 demention
    inputs_per_class  = np.bincount(labels)
    max_inputs = np.max(inputs_per_class) *ratio
    
    num_classes = len(inputs_per_class)
    
    new_images =[]
    new_labels =[]
    
    count_fun_key =0;
    
    #one labels
    for index_label in range(num_classes):
        random_size_different = np.random.randint(0,100,size = 1)
        input_ratio = math.ceil((max_inputs - inputs_per_class[index_label])/inputs_per_class[index_label])
        input_different = max_inputs - inputs_per_class[index_label] + random_size_different
        input_different = input_different[0].astype(int)
        print(f"generating class: {index_label}, ratio: {input_ratio}, \
        current: {inputs_per_class[index_label]}, max samples: {max_inputs}\
        input_different: {input_different}")
        
        if input_ratio < 1:
            continue
        if input_different < 50: 
            continue
            
        index_input = np.where(labels==index_label)[0]
        len_index_input = np.size(index_input)
        
        index_of_index_input = np.random.randint(0,len_index_input,size = input_different)
        index_input_generate = index_input[index_of_index_input]
        
        index_function_trasform = np.random.randint(0,4, size=input_different)
        print(type(input_different))
        for i in range(0,input_different):
            if i%100 == 0:
                print('generate i: ',i)
            img = images[index_input_generate]
            func_key = index_function_trasform[i]
            trans_func = choose_transform_function(func_key)
            img_trans = trans_func(img)
            new_images.extend(img_trans)
            new_labels.append(index_label)

        
        #one img
#         for img in images[index_input]:
#             #the key func: [0:3)
#             trans_func_keys = np.random.randint(low=0,high=4,size=input_ratio)
# #             print("trans_func_keys: ",trans_func_keys)
#             for func_key in trans_func_keys:
#                 trans_func = choose_transform_function(func_key)
# #                 print(f"count funtion key: {count_fun_key}; trans_func: {trans_func.__name__}")
#                 count_fun_key +=1
#                 if trans_func is None:
#                     raise Exception("key of transform function is error")
#                 img_trans = trans_func(img)
#                 new_images.extend(img_trans)
#                 new_labels.append(index_label)
    
    new_images = np.asarray(new_images)
    new_labels = np.asarray(new_labels)
    print(f"generate data: new_images shape {new_images.shape}, new_labels shape {new_labels.shape}")
    return new_images,new_labels

def save_data_enhancement(ims,lbs, pickle_fn):
    """
    #argument:
        ims: images
        lbs: labels
        file_name: file name  of data is saved
    """
    if not os.path.exists(os.path.dirname(pickle_fn)):
        try:
            os.makedirs(os.path.dirname(pickle_fn))
        except OSError as exc: # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    
    ims = np.asarray(ims)
    lbs = np.asarray(lbs)
    data ={'ims':ims,'lbs':lbs}
    with open(pickle_fn,'wb') as f:
        check =pickle.dump(data,f)
    print(f'done save_data into {pickle_fn}')
        
def read_data_enhancement(pickle_fn):
    """
    #argument: 
        file_name: file's name is loaded
    #return: 
        images, labels
    """
    if os.path.isfile(pickle_fn):
        data= pickle.load(open(pickle_fn,'rb'))
        print(f'done read_dataa from {pickle_fn}')
        return data
    else:
        raise Exception("the file data enhancement is not found")
def concatenate_data(data_raw, data_generate):
    """
    #argument: 
        data_raw: data raw
        data_generate: data is generated
    #return
        imgs_raw, data_generate are concated\
        
    uses for label and imgs
    """
#     if not data_raw[].shape[1:4]==data_generate[].shape[1:4]:
#         raise Exception("the shape of data_raw,data_generate are not same")
     
    #shape ims: (num_img,img_size,img_size,channel) => concatenate with axis =0
    data = np.append(data_raw,data_generate,axis=0)
    
    return data
    

In [9]:
data = read_data(r"..\data_pickle\mydata_no_crop.p");
print(data.keys())
print(data['ims'].shape)
print(data['lbs'].shape)

done read_dataa from ..\data_pickle\mydata_no_crop.p
dict_keys(['ims', 'lbs', 'num_class', 'lbs_name'])
(39209, 32, 32, 3)
(39209,)


In [10]:
#generate and save data
file_name_data = r"../data_pickle/new_data_gen_2012.p"
# file_name_data = "crop\data_crop_color.p"

new_images,new_labels = generate_data(data['ims'],data['lbs'],ratio=1.2)
print(f'new_images {new_images}, new_labels {new_labels}')

generating class: 0, ratio: 12,         current: 210, max samples: 2700.0        input_different: 2492
<class 'numpy.int32'>
generate i:  0
generate i:  100
generate i:  200
generate i:  300
generate i:  400
generate i:  500
generate i:  600
generate i:  700
generate i:  800
generate i:  900
generate i:  1000
generate i:  1100
generate i:  1200
generate i:  1300


MemoryError: 

In [None]:
save_data_enhancement(new_images,new_labels,file_name=file_name_data)

In [None]:
#load_data_generate 
data_generate = read_data_enhancement(file_name=file_name_data)

In [None]:
print(data_generate['ims'].shape)
print(data_generate['lbs'].shape)
# print(images.shape)
#concate data
x_generate =data_generate['ims']
y_generate =data_generate['lbs']
print("max pixel: ",np.max(x_generate))
X = concatenate_data(images,x_generate)
y=concatenate_data(labels,y_generate)
print(X.shape)
print(y.shape)

file_name_full = "full\data_3009.p"
save_data_enhancement(ims=X,lbs=y,file_name=file_name_full)

y=convert_onehot_numpy(y,num_classes=43)
print(y.shape)
# X,y =preprocess_dataset(X,y,is_shuffle=True)
#shuffle 

X,y = shuffle(X,y)
print("done shuffle")
X = preprocess_input_gray_norm(X)
file_name_full = "full\data_2012_processed_gray_norm.p"
save_data_enhancement(ims=X,lbs=y,file_name=file_name_full)

print(X.shape)
print("done preprocess")

In [None]:
print(np.max(X))
print(X.shape)
print(y.shape)

In [22]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras.optimizers import SGD
from keras import backend as K
IMG_SIZE=32
NUM_CLASSES=43
def cnn_model():
    model = Sequential()

    model.add(Conv2D(32, (3, 3), padding='same',
                     input_shape=( IMG_SIZE, IMG_SIZE,1),
                     activation='relu'))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, (3, 3), padding='same',
                     activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(128, (3, 3), padding='same',
                     activation='relu'))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASSES, activation='softmax'))
    return model

In [23]:
from keras import optimizers
from keras.callbacks import LearningRateScheduler, ModelCheckpoint

model = cnn_model()
lr=0.001
adam = keras.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])

# def lr_schedule(epoch):
#     return lr * (0.1 ** int(epoch / 10))

batch_size = 32
epochs = 30

model.fit(X, y,
          batch_size=batch_size,
          epochs=epochs,validation_split=0.2,
          callbacks=[ModelCheckpoint('..\model\model_gray_no_crop.h5', save_best_only=True)]
          )

model.save("my_model_gray_no_crop.h5")

Train on 95612 samples, validate on 23904 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [25]:
import pandas as pd
import os
import cv2
import numpy as np
test = pd.read_csv(r"..\data\test\GTSRB\GT-final_test.csv", sep=';')

# Load test dataset
X_test = []
y_test = []
i = 0
print(list(test))
# print(list(test['Filename']))
# print(list(test['ClassId']))

for file_name, class_id in zip(list(test['Filename']), list(test['ClassId'])):
    img_path = os.path.join(r'..\data\test\GTSRB', file_name)
#     print(img_path)
    img = cv2.imread(img_path)
    img = cv2.resize(img,(32,32))
    red_channel =img[:,:,0]
    green_channel = img[:,:,1]
    blue_channel = img[:,:,2]

    #convert image to gray by red, greeen, blue channel
    gray_img = 0.2989 * red_channel + 0.5870 * green_channel + 0.1140 * blue_channel

    if (np.max(gray_img)>1):
        gray_img = gray_img/255.
  
    X_test.append(gray_img)
    y_test.append(class_id)

X_test = np.array(X_test)
X_test=X_test.reshape(X_test.shape +(1,))
y_test = np.array(y_test)
print(y_test.shape)
print(X_test.shape)


['Filename', 'Width', 'Height', 'Roi.X1', 'Roi.Y1', 'Roi.X2', 'Roi.Y2', 'ClassId']
(12630,)
(12630, 32, 32, 1)


In [26]:

#predict
y_pred = model.predict_classes(X_test)
print(y_pred)
print(np.max(y_pred))
acc = np.sum(y_pred == y_test) / np.size(y_pred)
print("Test accuracy = {}".format(acc))

[16  1 38 ...  6  7 10]
42
Test accuracy = 0.9770387965162312


In [39]:
# from keras.models import load_model
# model = load_model("..\my_model.h5")
# print(model.get_weights())
# print(model.summary())
# predict and evaluate
# y_pred = model.predict_classes(X_test[11743].reshape(-1,32,32,3))
# # print(y_pred.shape)
# print(y_pred)
# print(np.max(y_pred))
# acc = np.sum(y_pred == y_test) / np.size(y_pred)
# print("Test accuracy = {}".format(acc))

# print(y_pred == y_test)

[40]


In [12]:
32*32*3


3072

In [34]:
if 4%2 == 0:
    print('1')

1
