# 1. Common import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 7)

# 2. Frame size

In [None]:
FRAME_WIDTH = 1920
FRAME_HEIGHT = 1080

# 3. Filename from index

In [None]:
ZERO_PADDING = 6

def get_filename(index):
    return "src/Csv/{}.txt".format(str(index).zfill(ZERO_PADDING))

def get_framename(index):
    return "src/Frame/{}.jpg".format(str(index).zfill(ZERO_PADDING))

# 4. Pedestrian info

In [None]:
NUMBER_OF_PEDESTRIANS = 12273

def download_pedestrian(index):
    assert(0 <= index < NUMBER_OF_PEDESTRIANS), "pedestrian number should be between 0 and {max}; given number: {id}".format(
        max=NUMBER_OF_PEDESTRIANS-1, id=index)
    filename = get_filename(index)
    data = pd.read_csv(filename, index_col=0)
    return data

# 5. Create and save plot.bar

In [None]:
ind = np.arange(25)
width = 0.4
test_bar = plt.bar(ind - width/2, test_by_labels_normalized, width, color='r')
train_bar = plt.bar(ind + width/2, train_by_labels_normalized, width, color='b')

plt.ylabel('Percent')
plt.xlabel('Label number')
plt.title('Pedestrian percent by group and label')
plt.xticks(ind + width/2., range(1, 26))
plt.legend((test_bar[0], train_bar[0]), ('Test', 'Train'))
plt.savefig('src/Plots/161002_pedestrian_separation_percent_by_label.png')

# 6. Distances between arrays

In [None]:
# array1: [sX1; sY1; sX2; sY2; ...]
# array2: [eX1; eY1; eX2; eY2; ...]
# output: [dist((sX1, sY1), (eX1, eY1)), dist((sX1, sY1), (eX1, eY1)),
#          dist((sX2, sY2), (eX2, eY2)), dist((sX2, sY2), (eX2, eY2)), ...]

def distance_for_each_point(array1, array2):
    array_length = len(array1)
    len2 = len(array2)
    assert (array_length == len2), "Arrays' sizes have to be equal (array1: {}, array2: {})".format(array_length, len2)
    
    if array1.ndim == 1:
        distance = np.linalg.norm((array1 - array2).reshape((int(array_length / 2), 2)), axis=1)
        result = np.array([[d, d] for d in distance]).flatten()
    else:
        result = np.array([distance_for_each_point(array1[i], array2[i]) for i in range(array_length)])
    
    return result

# 7. Sample number in sets

In [2]:
# Number of samples in each class

TEST_SAMPLE_NUMBER = 51731
TRAIN_SAMPLE_NUMBER = 257187

# 8. Current time

In [None]:
from time import localtime, strftime
print(strftime("%Y-%m-%d %H:%M:%S", localtime()))

# 9. Transform data

In [None]:
# data may be present as [n_features * n_samples] or [n_samples * n_features] 
def to_model(df):
    return np.array(df).T.reshape((int(df.shape[1] / 2), 10))
def from_model(npa):
    return npa.reshape((len(npa) * 2, 5)).T

# 10. Metric

In [None]:
def distance(test_results, predicted_results):
    return distance_for_each_point(np.array(test_results), predicted_results).sum() / TEST_SAMPLE_NUMBER / 2

# 11. Find pedestrian and first frame by path

In [1]:
def find_by_path(path, search_in_test=True):
    
    if search_in_test:
        pedestrian_separation = pd.read_csv('src/pedestrian_labels_and_test_and_train_separation.csv', index_col=0)
        indeces = np.where(np.array(pedestrian_separation['category']) == 'test')[0]
    else:
        indeces = np.arange(NUMBER_OF_PEDESTRIANS)
        
    path = path.flatten()    
    len2 = len(path)
        
    for i in indeces:
        df = download_pedestrian(i)
        data = np.array(df).flatten()
        len1 = len(data)
        data_view = np.lib.stride_tricks.as_strided(data, shape=(len1 - len2 + 1, len2),
                                                    strides=(data.dtype.itemsize,) * 2)[::2, :]
        
        ind = np.where(np.all(data_view == path, axis=1))[0]
        if len(ind) > 0:
            return (i, df.index.values[ind[0]])

# 12. Make view

In [None]:
def make_view(data, length=4, only_odd_rows=True):
    data = np.array(data).flatten()
    len1 = len(data)
    len2 = length
    return np.lib.stride_tricks.as_strided(data, shape=(len1 - len2 + 1, len2),
                                                    strides=(data.dtype.itemsize,) * 2)[::2 if only_odd_rows else 1, :]

# 13. Draw lines in frame

In [None]:
def draw_lines(frame_number, paths, colors, image_name, draw_line=True):
    im = Image.open(get_framename(frame_number))
    draw = ImageDraw.Draw(im)
    for path, color in zip(paths, colors):
        if draw_line:
            path_view = make_view(path, 4)
            for line in path_view:
                draw.line(tuple(line), width=4, fill=color)
        else:
            for point in path:
                draw.ellipse(tuple(np.append(point, point+10)), fill=color)

    im.save("src/Images/{name}.jpg".format(name=image_name))

# 14. Baseline

In [None]:
# first simply algorithm to get some start
# more about it you can find in 'baseline_distance_between_real_points_and_predicted.ipynb'
def baseline(dataframe, start_point_index=0, number_of_points_to_return=5):
    error_template = "Start point index should be less than last point. Start point index: {st}, last point index: {end}"
    assert (start_point_index < len(dataframe) - 1), error_template.format(st=start_point_index, end=len(dataframe) - 1)
    
    start_point = np.array(dataframe)[start_point_index]
    last_but_one_point = np.array(dataframe)[-2]
    end_point = np.array(dataframe)[-1]
    
    distance = distance_for_each_point(end_point, start_point)
    normalized_motion_vector = (end_point - start_point) / distance
    normalized_motion_vector[np.where(distance == 0)] = 0
    last_vector_length = distance_for_each_point(end_point, last_but_one_point)
    
    motion_vector = normalized_motion_vector * last_vector_length
    result = []
    for i in range(number_of_points_to_return):
        result.append(end_point + (i + 1) * motion_vector)
        
    return np.array(result)

compare_results(function=baseline, test_results=test_results, parameter_name="start_point_index", list_of_values=range(4),
          dataframe=test_data)

# 15. kNN

In [None]:
from sklearn.neighbors import KNeighborsRegressor

# n_neighbors by defalt is 5;         to compare: range(5, 55, 5)
# weights     by defalt is 'uniform'; to compare: ['uniform', 'distance']
# algorithm   by defalt is 'auto';    to compare: ['auto', 'ball_tree', 'kd_tree', 'brute']
# note: 'brute' is causing some memory error

def kneighbors_regressor(train_data, train_results, test_data, **kwargs):
    neigh = KNeighborsRegressor(**kwargs)
    neigh.fit(to_model(train_data), to_model(train_results))
    return from_model(neigh.predict(to_model(test_data)))

compare_results(function=kneighbors_regressor, test_results=test_results, parameter_name='n_neighbors',
                list_of_values=range(5, 50, 5), train_data=train_data, train_results=train_results, test_data=test_data)

# 16. Random forest regression

In [None]:
from sklearn.ensemble import RandomForestRegressor

def random_forest_regressor(n_estimators, train_data, train_results, test_data):
    est = RandomForestRegressor(n_estimators=n_estimators)
    est.fit(to_model(train_data), to_model(train_results))
    return from_model(est.predict(to_model(test_data)))

compare_results(function=random_forest_regressor, test_results=test_results, parameter_name="n_estimators",
                list_of_values=range(10, 50, 10), train_data=train_data, train_results=train_results,
                test_data=test_data)