In [6]:
import pandas as pd
import numpy as np
import os
import pickle

import matplotlib.pyplot as plt

import cv2

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from skimage.feature import hog

new_shape = (64, 64)

# Creating the model
print("\nCreating and compiling the model...")
m_depth = 10
model = RandomForestRegressor(n_estimators=5000, n_jobs = -1, max_depth = m_depth)


## function extract the box of the plate and read the image
def process(input_file):
    with open(input_file, 'r') as f:
        a = f.readlines()[0]
    a = a.split("\t")
    name = a[0]
    bbox = list(map(int, a[1:5]))
    img = cv2.imread(input_file[:-4] + '.jpg')
    return img, bbox

## resize the image box
def resize_im_box(image, box, new_shape):
    resized_img = cv2.resize(image, new_shape)
    
    scale_y = new_shape[0] / image.shape[0]
    scale_x = new_shape[1] / image.shape[1]
    
    resized_box = box.copy()
    resized_box[0] = int(box[0]*scale_x)
    resized_box[2] = int(box[2]*scale_x)
    resized_box[1] = int(box[1]*scale_y)
    resized_box[3] = int(box[3]*scale_y)
    
    return resized_img, resized_box

### extract HOG features to improve the model
def hog_feature(img_array, resize=(16,16)):
    """extract hog feature from an image.
    Args:
        img_array: an image array.
        resize: size of the image for extracture.  
    Return:
    features:  a ndarray vector.      
    """
    img = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
    bins = 9
    cell_size = (8, 8)
    cpb = (2, 2)
    norm = "L2"
    features = hog(img, orientations=bins, pixels_per_cell=cell_size,
                        cells_per_block=cpb, block_norm=norm, transform_sqrt=True)
    return features

def make_feauture(img):
    hog_f = hog_feature(img)
    f = np.hstack([hog_f, np.array([img.shape[0], img.shape[1]])])
    return f
   

#### Aply defined functions on the different datasets, we have
print("\nLoading EU images...")
eu_img_buffer = []
eu_box_buffer = []

for input_file in os.listdir("endtoend/eu/"):
    if input_file.endswith('.txt'):
        img, box = process("endtoend/eu/" + input_file)
        img, box = resize_im_box(img, box, new_shape = new_shape)
        eu_img_buffer.append(img)
        eu_box_buffer.append(box)
        
us_img_buffer = []
us_box_buffer = []

print("\nLoading US images...")

for input_file in os.listdir("endtoend/us/"):
    if input_file.endswith('.txt'):
        img, box = process("endtoend/us/" + input_file)
        img, box = resize_im_box(img, box, new_shape = new_shape)
        us_img_buffer.append(img)
        us_box_buffer.append(box)

print("\nLoading BR images...")

br_img_buffer = []
br_box_buffer = []

for input_file in os.listdir("endtoend/br/"):
    if input_file.endswith('.txt'):
        img, box = process("endtoend/br/" + input_file)
        img, box = resize_im_box(img, box, new_shape = new_shape)
        br_img_buffer.append(img)
        br_box_buffer.append(box)


print("\n Loading data done")
remove_for_vis = 5
img_data_to_consider = eu_img_buffer[:-remove_for_vis] + us_img_buffer[:-remove_for_vis] + br_img_buffer[:-remove_for_vis]
box_data_to_consider = eu_box_buffer[:-remove_for_vis] + us_box_buffer[:-remove_for_vis] + br_box_buffer[:-remove_for_vis]

def process_line(line):
    splits = line.split('\t')
    name = splits[0]
    box = list(map(int, splits[1:-1]))
    img = cv2.imread("augmented/" + name)
    return img, box
    
print("\nLoading augmented images...\n") 

with open("augmented/augmented.txt", "r") as my_file:
    lines = my_file.readlines()
    

for i in range(len(lines)):
    line = lines[i]
    img, box = process_line(line)
    img, box = resize_im_box(img, box, new_shape = new_shape)
    img_data_to_consider.append(img)
    box_data_to_consider.append(box)

#img_viz = eu_img_buffer[-remove_for_vis:] + us_img_buffer[-remove_for_vis:] + br_img_buffer[-remove_for_vis:]
#box_viz = eu_box_buffer[-remove_for_vis:] + us_box_buffer[-remove_for_vis:] + br_box_buffer[-remove_for_vis:]

img_data_to_consider = list(map(make_feauture, img_data_to_consider))

print("Train-test split")

##### we have more than 13000 images (original and augmented), for a speed sake we gonna train and test only on 2300 images
a= 11000 ### to remove from our set
img_data_to_consider = img_data_to_consider[:-a]
box_data_to_consider = box_data_to_consider[:-a]

#### split to train and test set
X_train, X_test, y_train, y_test = train_test_split(np.array(img_data_to_consider), np.array(box_data_to_consider), 
                                                    test_size = 0.2, random_state = 42)

print("\nX_train shape : ", X_train.shape)
print("y_train shape : ", y_train.shape)
print("X_test shape : ", X_test.shape)
print("y_test shape : ", y_test.shape)

print('\n')

print("Training...\n")


bsize = 256
model.fit(X_train, y_train)




Creating and compiling the model...

Loading EU images...

Loading US images...

Loading BR images...

 Loading data done

Loading augmented images...

Train-test split

X_train shape :  (1839, 1766)
y_train shape :  (1839, 4)
X_test shape :  (460, 1766)
y_test shape :  (460, 4)


Training...



RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=10,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=5000, n_jobs=-1,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

###### create a function to make the report of the performances and export the model

In [9]:
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

def make_report(y_train, y_pred_train, y_test, y_pred_test):
    r2_train = r2_score(y_true = y_train, y_pred = y_pred_train)
    r2_test = r2_score(y_true = y_test, y_pred = y_pred_test)
    
    mse_train = mean_squared_error(y_true = y_train, y_pred = y_pred_train)
    mse_test = mean_squared_error(y_true = y_test, y_pred = y_pred_test)
    
    mae_train = mean_absolute_error(y_true = y_train, y_pred = y_pred_train)
    mae_test = mean_absolute_error(y_true = y_test, y_pred = y_pred_test)
    
    return pd.DataFrame(
        data = {
            "R2" : [r2_train, r2_test],
            "MSE" : [mse_train, mse_test],
            "MAE" : [mae_train, mae_test]
        },
        index = ["train", "test"]
    )
    

print("Saving the model...")

try:
    model.save("rfr_m_{}".format(m_depth))
except:
    try:
        with open("rfr_file_m_{}.pickle", "rb") as rfr_file:
            pickle.dump(
                {
                    "rfr":model,
                    "report":make_report(y_train, y_pred_train, y_test, y_pred_test)
                }, 
                rfr_file
            )
    except:
        pass
    



Saving the model...


In [10]:
make_report(y_train, y_pred_train, y_test, y_pred_test)

Unnamed: 0,R2,MSE,MAE
train,0.917673,9.285874,2.297585
test,0.687397,37.811013,4.30236
