# 1. Common import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 7)

# 2. Frame size

In [None]:
FRAME_WIDTH = 1920
FRAME_HEIGHT = 1080

# 3. Filename from index

In [None]:
ZERO_PADDING = 6

def get_filename(index):
    return "src/Csv/{}.txt".format(str(index).zfill(ZERO_PADDING))

# 4. Pedestrian info

In [None]:
NUMBER_OF_PEDESTRIANS = 12273

def download_pedestrian(index):
    error_message_template = "pedestrian_separation_to_test_and_train_according_to_start_point.ipynbestrian number should be between 0 and {max}; given number: {id}"
    assert(0 <= index < NUMBER_OF_PEDESTRIANS), error_message_template.format(max=NUMBER_OF_PEDESTRIANS-1, id=index)
    filename = get_filename(index)
    data = pd.read_csv(filename, index_col=0)
    return data

# 5. Create and save plot.bar

In [None]:
ind = np.arange(25)
width = 0.4
test_bar = plt.bar(ind - width/2, test_by_labels_normalized, width, color='r')
train_bar = plt.bar(ind + width/2, train_by_labels_normalized, width, color='b')

plt.ylabel('Percent')
plt.xlabel('Label number')
plt.title('Pedestrian percent by group and label')
plt.xticks(ind + width/2., range(1, 26))
plt.legend((test_bar[0], train_bar[0]), ('Test', 'Train'))
plt.savefig('src/Plots/161002_pedestrian_separation_percent_by_label.png')

# 6. Distances between arrays

In [None]:
# array1: [sX1; sY1; sX2; sY2; ...]
# array2: [eX1; eY1; eX2; eY2; ...]
# output: [dist((sX1, sY1), (eX1, eY1)), dist((sX1, sY1), (eX1, eY1)),
#          dist((sX2, sY2), (eX2, eY2)), dist((sX2, sY2), (eX2, eY2)), ...]

def distance_for_each_point(array1, array2):
    error_template = "Array lengths should be equal. len(array1): {len1}, len(array2): {len2}"
    assert (len(array1) == len(array2)), error_template.format(len1=len(array1), len2=len(array2))
    
    # for one-dimension arrays np.linalg.norm works in one way and for two-dimension in other
    if array1.ndim == 1:
        length = len(array1)
        # array1[i:i+2] -- point i from first array
        # np.linalg.norm -- calculate distance between points
        distance = np.array([np.linalg.norm(array1[i:i+2] - array2[i:i+2]) for i in range(0,length,2)])
        result = np.array([[d, d] for d in distance]).flatten()
    else:
        result = np.array([distance_for_each_point(array1[i], array2[i]) for i in range(len(array1))])
    
    return result

# 7. Sample number in sets

In [None]:
# Number of samples in each class

TEST_SAMPLE_NUMBER = 57092
TRAIN_SAMPLE_NUMBER = 285998

# 8. Current time

In [None]:
from time import localtime, strftime
print(strftime("%Y-%m-%d %H:%M:%S", localtime()))

# 9. Transform data

In [None]:
# data may be present as [n_features * n_samples] or [n_samples * n_features] 
def to_model(df):
    return np.array(df).T

# 10. Metric

In [None]:
def distance(test_results, predicted_results):
    return distance_for_each_point(np.array(test_results), predicted_results).sum() / TEST_SAMPLE_NUMBER / 2

# 11. FRAMEWORK FULL

In [None]:
# Number of samples in each class

TEST_SAMPLE_NUMBER = 57092
TRAIN_SAMPLE_NUMBER = 285998

# array1: [sX1; sY1; sX2; sY2; ...]
# array2: [eX1; eY1; eX2; eY2; ...]
# output: [dist((sX1, sY1), (eX1, eY1)), dist((sX1, sY1), (eX1, eY1)),
#          dist((sX2, sY2), (eX2, eY2)), dist((sX2, sY2), (eX2, eY2)), ...]

def distance_for_each_point(array1, array2):
    error_template = "Array lengths should be equal. len(array1): {len1}, len(array2): {len2}"
    assert (len(array1) == len(array2)), error_template.format(len1=len(array1), len2=len(array2))
    
    # for one-dimension arrays np.linalg.norm works in one way and for two-dimension in other
    if array1.ndim == 1:
        length = len(array1)
        # array1[i:i+2] -- point i from first array
        # np.linalg.norm -- calculate distance between points
        distance = np.array([np.linalg.norm(array1[i:i+2] - array2[i:i+2]) for i in range(0,length,2)])
        result = np.array([[d, d] for d in distance]).flatten()
    else:
        result = np.array([distance_for_each_point(array1[i], array2[i]) for i in range(len(array1))])
    
    return result

# data may be present as [n_features * n_samples] or [n_samples * n_features] 
def to_model(df):
    return np.array(df).T

def distance(test_results, predicted_results):
    return distance_for_each_point(np.array(test_results), predicted_results).sum() / TEST_SAMPLE_NUMBER / 2

def framework(function, test_results, parameter_name, list_of_values, **other_parameters):
    log_file_name = "src/Logs/{date}_predicted_coordinates_{function_name}_{parameter_name}_".format(
        date=strftime("%Y%m%d", localtime()), function_name=function.__name__, parameter_name=parameter_name)
    plot_file_name = "src/Plots/{date}_{function_name}_difference_by_{parameter_name}.png".format(
        date=strftime("%Y%m%d", localtime()), function_name=function.__name__, parameter_name=parameter_name)
    
    # we will keep results for each configuration here
    result = []

    for i, value in enumerate(list_of_values):
        print("{cur}/{num}: {time}".format(cur=i+1, num=len(list_of_values), time=strftime("%Y-%m-%d %H:%M:%S", localtime())))
        other_parameters[parameter_name] = value        
        predicted_results = function(**other_parameters)
        np.save(log_file_name + str(value), predicted_results)
        result.append(distance(test_results, predicted_results))
        
    ind = list_of_values
    width = (list_of_values[1] - list_of_values[0]) / 2
    result_bar = plt.bar(ind, result, width, color='g')

    plt.ylabel('Average difference')
    plt.xlabel(parameter_name)
    plt.title("Difference between real points and predicted by {parameter_name} in {function_name}".format(
        function_name=function.__name__, parameter_name=parameter_name))
    plt.xticks(np.array(ind) + width/2, ind)
    plt.savefig(plot_file_name)    