A script for learn a model to hold a car in a lane. The best models were random forest and gradient boosting. Since the random forest works faster, it was decided to use it.

In [None]:
import cv2
import numpy as np
from os import listdir
from matplotlib import pyplot as plt
import pandas as pd

In [None]:
images_dir = 'nimages'
img_size = (128, 128)

In [None]:
df = pd.read_csv('joy.csv')
dfz = df.loc[(df['axis'] == 'x')]
dfy = df.loc[(df['axis'] == 'z')]

In [None]:
def shuffle_in_unison(a, b):
    # courtsey http://stackoverflow.com/users/190280/josh-bleecher-snyder
    assert len(a) == len(b)
    shuffled_a = np.empty(a.shape, dtype=a.dtype)
    shuffled_b = np.empty(b.shape, dtype=b.dtype)
    permutation = np.random.permutation(len(a))
    for old_index, new_index in enumerate(permutation):
        shuffled_a[new_index] = a[old_index]
        shuffled_b[new_index] = b[old_index]
    return shuffled_a, shuffled_b

In [None]:
def create_Xt_Yt(X, y, percentage=0.8):
    p = int(len(X) * percentage)
    X_train = X[0:p]
    Y_train = y[0:p]

    X_test = X[p:]
    Y_test = y[p:]

    return X_train, X_test, Y_train, Y_test

In [None]:
def make_x_y():
    x, y = [], []
    for i, imgn in enumerate(listdir(images_dir)):  # iteration over images in a folder
        try:
                img = cv2.imread(images_dir+'/'+imgn, cv2.IMREAD_ANYCOLOR)
                res = cv2.resize(img, img_size)
                res = res.ravel()
                x.append(res)
                
                time = imgn[:-4]#get time from image's name
                prev_listz = np.array(dfz.loc[dfz['time'] <= np.float64(time)]['val'])
                prev_listy = np.array(dfy.loc[dfy['time'] <= np.float64(time)]['val'])
                
                try:
                    prevz =  prev_listz[-1]
                    
                except:
                    prevz = 0.0
                    
                y.append([prevz])
                
        except Exception as e:
            print(e)
            print(i)
            
    x = np.array(x)
    y = np.array(y)
    
    x, y = shuffle_in_unison(x, y) #  shuffle dataset
    x_train, x_test, y_train, y_test = create_Xt_Yt(x, y)
    
    return (np.array(x_train), np.array(y_train)), (np.array(x_test), np.array(y_test))

In [None]:
(x_train, y_train), (x_test, y_test) = make_x_y()
print(x_train.shape)

In [None]:
import seaborn as sns
sns.distplot(y_train[:,0])

In [None]:
from sklearn.model_selection import learning_curve
from sklearn.metrics import r2_score, make_scorer
from sklearn.tree import DecisionTreeRegressor

def plot_with_err(x, data, **kwargs):
    mu, std = data.mean(1), data.std(1)
    lines = plt.plot(x, mu, '-', **kwargs)
    plt.fill_between(x, mu - std, mu + std, edgecolor='none',
    facecolor=lines[0].get_color(), alpha=0.2)
    
def plot_learning_curve(degree=2, alpha=0.01):
    ftwo_scorer = make_scorer(r2_score)
    train_sizes = np.linspace(0.05, 1, 20)
    reg = DecisionTreeRegressor(max_depth=30, max_features=100, random_state=10, min_samples_leaf=10)
    N_train, val_train, val_test = learning_curve(reg,
    x_train, y_train, train_sizes=train_sizes, cv=5,
    scoring=ftwo_scorer)
    plot_with_err(N_train, val_train, label='training scores')
    plot_with_err(N_train, val_test, label='validation scores')
    plt.xlabel('Training Set Size'); plt.ylabel('AUC')
    plt.legend()

plot_learning_curve(degree=2, alpha=10)

In [None]:
from sklearn.ensemble import RandomForestRegressor
reg = RandomForestRegressor(max_features=450, random_state=100, n_estimators=100, n_jobs=4, min_samples_leaf=1)
reg.fit(x_train, y_train)
print(reg.score(x_train, y_train), reg.score(x_test, y_test))

In [None]:
import pickle
with open('tree.pickle', 'wb') as f:
    pickle.dump(reg, f)