# Note:
* First of all, **run the libraries**
* Set the path for ** *CSV-file*** 
* Then **load the file (*CSV*)** by the following instructions provided after running 
* Continue your work ...

# Libraries

In [None]:
import filterpy.kalman as kf 
import filterpy.stats as stats 

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

from urllib.request import urlopen 
import xml.etree.ElementTree as et 
import math 
from datetime import datetime 
from decimal import Decimal 

# Download the CSV file from below link

##### https://jejuunivackr-my.sharepoint.com/:x:/g/personal/asif_office_jejunu_ac_kr/EfXmF0CYUolHrpEWuaFQFiYBE-Z3JKsYUBeqJXQN39uqOg
If above link does not work (contact me via malikasifmahmoodawan@gmail.com)

# Setting the path from where we'll load the CSV-file

In [None]:
path_to_csv_file = ( 
    r"/media/ncl-akraino/Volume-HDD-1/akraino/ws-akraino-datasets/OpenStreetMap 2 Traces/" 
    "SelfOpenStreetMapTraceForASparseTraffic.csv" 
) 

# Initializing variables

In [None]:
temp_counter = 0 

In [None]:
dt = 1 

In [None]:
display_max_rows = 60 
display_max_cols = 10 

In [None]:
MARKER_SZ = 3

In [None]:
FIG_SZ = [10, 10] 

In [None]:
pd.set_option("display.max_columns", display_max_cols) 

# Functions

## Common functions

In [None]:
# function to convert to subscript 
def get_sub(x): 
    normal = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-=()" 
    sub_s = "ₐ₈CDₑբGₕᵢⱼₖₗₘₙₒₚQᵣₛₜᵤᵥwₓᵧZₐ♭꜀ᑯₑբ₉ₕᵢⱼₖₗₘₙₒₚ૧ᵣₛₜᵤᵥwₓᵧ₂₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎" 
    res = x.maketrans(''.join(normal), ''.join(sub_s)) 
    return x.translate(res) 

In [None]:
# function to convert to superscript 
def get_super(x): 
    normal = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-=()" 
    super_s = "ᴬᴮᶜᴰᴱᶠᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾQᴿˢᵀᵁⱽᵂˣʸᶻᵃᵇᶜᵈᵉᶠᵍʰᶦʲᵏˡᵐⁿᵒᵖ۹ʳˢᵗᵘᵛʷˣʸᶻ⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾" 
    res = x.maketrans(''.join(normal), ''.join(super_s)) 
    return x.translate(res) 

## Specific functions

In [None]:
# converts columns to floats 
def convert_df_cols_to_float(df, cols): 
    for column in cols: 
        df[column] = df[column].astype(float) 
    return df 

In [None]:
# finding the minimum/maximum coordinates of the dataset 
def find_min_max_coordinates(df, cols=["x", "y"]): 
    min_vals = [] 
    max_vals = [] 
    for column in cols: 
        min_vals.append(min(df[column])) 
        max_vals.append(max(df[column])) 
    return [min_vals[0], min_vals[1]], [max_vals[0], max_vals[1]] 

In [None]:
# get the unique values in columns of the dataset 
def get_dataset_unique_stats(df, cols): 
    unique_values = [] 
    for column in cols: 
        unique_values.append( 
            {column: df[column].unique()} 
        ) 
    return unique_values 

In [None]:
# displays the unique values in columns of the dataset 
def display_dataset_unique_values(arr, cols): 
    for index, value in enumerate(cols): 
        print ("Unique {:}(s): {:}".format(value, arr[index][value])) 

In [None]:
# get column index in the array 
def get_column_index(arr, search_col, search_cols): 
    index = search_cols.index(search_col)
    return arr[index][search_col] 

## .

In [None]:
# detects the directions using the provided angle 
def detect_direction(angle=0.00): 
    if (angle == 0.00 or angle == 360.00): 
        return "None", "North" 
    elif (angle>0.00 and angle<90.00): # 
        return "East", "North" 
    elif (angle == 90.00): 
        return "East", "None" 
    elif (angle>90.00 and angle<180.00): # 
        return "East", "South" 
    elif (angle == 180.00): 
        return "None", "South" 
    elif (angle>180.00 and angle<270.00): # 
        return "West", "South" 
    elif (angle == 270.00): 
        return "West", "None" 
    elif (angle>270.00 and angle<360.00): # 
        return "West", "North" 

In [None]:
# detects the angle-range using the provided directions 
def detect_angle_range(x_dir, y_dir): 
    if (x_dir == "East" and y_dir == "North"): 
        return 0, 90 
    elif (x_dir == "East" and y_dir == "South"): 
        return 90, 180 
    elif (x_dir == "West" and y_dir == "South"): 
        return 180, 270 
    elif (x_dir == "West" and y_dir == "North"): 
        return 270, 360 
    else: 
        return 0.0, 0.0 

In [None]:
def get_prediction(x, y, a, s): 
    x_dir , y_dir = detect_direction(a) 
    minweight_angle, maxweight_angle = detect_angle_range(x_dir, y_dir) 
    weight_range = maxweight_angle-minweight_angle 
    if (x_dir == "None" or y_dir == "None"): 
        xweight_angle, yweight_angle = 0, 0 
        if (x_dir == "None"): 
            yweight_angle = 1 
            if (y_dir == "North"): 
                x_pred, y_pred = x, y+yweight_angle*dt*s 
            elif (y_dir == "South"): 
                x_pred, y_pred = x, y-yweight_angle*dt*s 
        elif (y_dir == "None"): 
            xweight_angle = 1 
            if (x_dir == "East"): 
                x_pred, y_pred = x+yweight_angle*dt*s, y 
            elif (x_dir == "West"): 
                x_pred, y_pred = x-yweight_angle*dt*s, y 
    elif (x_dir == "East" and y_dir == "North"): 
        xweight_angle, yweight_angle = (a-minweight_angle)/weight_range, (maxweight_angle-a)/weight_range 
        x_pred, y_pred = x+xweight_angle*dt*s, y+yweight_angle*dt*s 
    elif (x_dir == "East" and y_dir == "South"): 
        xweight_angle, yweight_angle = (maxweight_angle-a)/weight_range, (a-minweight_angle)/weight_range 
        x_pred, y_pred = x+xweight_angle*dt*s, y-yweight_angle*dt*s 
    elif (x_dir == "West" and y_dir == "South"): 
        xweight_angle, yweight_angle = (a-minweight_angle)/weight_range, (maxweight_angle-a)/weight_range 
        x_pred, y_pred = x-xweight_angle*dt*s, y-yweight_angle*dt*s 
    elif (x_dir == "West" and y_dir == "North"): 
        xweight_angle, yweight_angle = (maxweight_angle-a)/weight_range, (a-minweight_angle)/weight_range 
        x_pred, y_pred = x-xweight_angle*dt*s, y+yweight_angle*dt*s 
    return round(x_pred, 2), round(y_pred, 2) 

In [None]:
def get_predictions(df): 
    arr_pred_cols = ["time", "x", "y"] 
    arr_pred_rows = [] 
    for index, item in df.iterrows(): 
        x, y = get_prediction(item["x"], item["y"], item["angle"], item["speed"]) 
        arr_pred_rows.append({ 
            "time": item["time"]+1, 
            "x": x, 
            "y": y, 
        }) 
    df = pd.DataFrame(arr_pred_rows, columns = arr_pred_cols) 
    return df 

## .

In [None]:
# get start/end coordinates for the dataframe 
def get_start_end_coordinates(df, col="time"): 
    min_df = df[df[col] == min(df[col])] 
    max_df = df[df[col] == max(df[col])] 
    return [ 
        min_df["x"].values[0], min_df["y"].values[0]
    ], [
        max_df["x"].values[0], max_df["y"].values[0] 
    ] 

## .

In [None]:
# filtering the items in dataframe having angleidiff > 180 
def get_items_having_greater_anglediff(df, angle_threshold): 
    df = df[df["anglediff"]>angle_threshold[1]] 
    if not df.empty: 
        arr_rows = [] 
        arr_cols = cols_to_convert 
        for index, item in df.iterrows(): 
            if(angle_threshold[2]-item["anglediff"]>=angle_threshold[0]): 
                arr_rows.append( 
                    { 
                        "x": item["x"], 
                        "y": item["y"], 
                        "time": item["time"], 
                        "speed": item["speed"], 
                        "angle": item["angle"] 
                    } 
                ) 
        df = pd.DataFrame( 
            arr_rows, columns = arr_cols 
        ) 
    return df 

In [None]:
# filtering the items in dataframe having angleidiff <= 180 based on angle-threshold≈30 
def get_items_having_smaller_anglediff(df, angle_threshold): 
    df =  df.loc[ 
        (df["anglediff"]>=angle_threshold[0]) & (df["anglediff"]<angle_threshold[1]) 
    ] 
    if not df.empty: 
        arr_rows = [] 
        arr_cols = cols_to_convert 
        for index, item in df.iterrows(): 
            if(item["anglediff"]>=angle_threshold[0]): 
                arr_rows.append( 
                    { 
                        "x": item["x"], 
                        "y": item["y"], 
                        "time": item["time"], 
                        "speed": item["speed"], 
                        "angle": item["angle"] 
                    } 
                ) 
        df = pd.DataFrame( 
            arr_rows, columns = arr_cols 
        ) 
    return df 

In [None]:
# merge two dataframes 
def merge_dataframes(df1, df2): 
    df_frames = [df1, df2] 
    df = pd.concat(df_frames) 
    return df 

In [None]:
# get sharp turns 
def get_sharp_turns(df, angle_threshold): 
    df = df.assign( 
        anglediff=abs(df["angle"].diff(periods=1)) 
    ) 
    df1 = get_items_having_greater_anglediff(df, angle_threshold) 
    df2 = get_items_having_smaller_anglediff(df, angle_threshold) 
    df = merge_dataframes(df1, df2) 
    return df 

## .

In [None]:
def setup_actual_and_predicted_dataframes(df1, df2): 
    df1 = df1.drop(df1.index[0], inplace=False) 
    df2 = df2.drop(df2.index[-1], inplace=False) 
    return df1, df2 

In [None]:
def get_distance_between_two_points(point1, point2): 
    distance = math.sqrt( 
        ((point2[1]-point1[1])**2) 
        + 
        ((point2[0]-point1[0])**2) 
    ) 
    return distance 

In [None]:
def get_distance_between_arrays( 
    arr1, arr2, cols 
): 
    arr1 = arr1.reset_index() 
    distance_rows = [] 
    distance_cols = ["distance"] 
    n = arr1.shape[0] 
    for index, coordinate in arr1.iterrows(): 
        point1 = [ 
            coordinate[cols[0]], 
            coordinate[cols[1]] 
        ] 
        point2 = [ 
            arr2[cols[0]].values[index], 
            arr2[cols[1]].values[index] 
        ] 
        distance = get_distance_between_two_points(point1, point2) 
        distance_rows.append( 
            {"distance": distance} 
        ) 
    df = pd.DataFrame(distance_rows, columns = distance_cols) 
    return df 

In [None]:
def get_error_data(df1, df2, cols=["x", "y"]): 
    n = df1.shape[0] 
    total_difference = 0 
    distance_array = get_distance_between_arrays( 
        df1, df2, cols 
    ) 
    return distance_array 

In [None]:
def root_mean_square_error(error_data): 
    number_of_entries = error_data.shape[0] 
    sum_of_entries = error_data["distance"].sum() 
    error_mean_square = math.sqrt(sum_of_entries/number_of_entries) 
    return error_mean_square 

## Not-used functions

In [None]:
def difference_between_columns(col1, col2, unit="distance"): 
    return (col2-col1) 

# Displaying (wink) Copyrights information

In [None]:
# Copyrights by Asif Mehmood 
print ("Asif©{:}".format(datetime.now().strftime("%Y"))) 

# Creating/preprocessing/preparing pandas dataframe from CSV-file

In [None]:
df_org = pd.read_csv(path_to_csv_file) 

## Converting strings to float-values in the dataset

In [None]:
cols_to_convert = [ 
    "time", "x", "y", "angle", "speed" 
] 
df_org = convert_df_cols_to_float(df_org, cols_to_convert) 

## Dropping unnecessary columns in the dataset

In [None]:
df_org = df_org.drop( 
    columns=["lane", "pos"] 
) 

## Sorting dataset with vehicle-id, time

In [None]:
df_org = df_org.sort_values( 
    ["id", "time"], ascending=True 
) 

## Setting maximum, minimum limit for graphs in Pilot

In [None]:
cols_to_search = [ 
    "x", "y" 
] 
start_coordinate, end_coordinate = find_min_max_coordinates(df_org, cols_to_search) 
print ("start [long, lat]: {:}".format(start_coordinate)) 
print ("end.. [long, lat]: {:}".format(end_coordinate)) 

## Setting maximum, minimum limit for graphs in Pilot

In [None]:
plt.xlim([math.floor(start_coordinate[0]), math.ceil(end_coordinate[0])]) 
plt.ylim([math.floor(start_coordinate[1]), math.ceil(end_coordinate[1])]) 
plt.figure(figsize=(FIG_SZ[0], FIG_SZ[1])) 

## Getting unique values for different columns

In [None]:
cols_to_unique_values = [ 
    "id", "speed", "angle", "time" 
] 
unique_values = get_dataset_unique_stats(df_org, cols_to_unique_values) 
display_dataset_unique_values(unique_values, cols_to_unique_values) 

# Plotting vehicle graphs

## Setting variables for vehicles to be plotted

In [None]:
col_to_group_by_id = "id" 
col_to_sort_by_id = "time" 
start_index_of_vehicle_id = 127 
number_of_vehicles_to_plot = 4 # this value should be: [an even number, more than 3] 
angle_threshold = [30, 180, 360] 

## Getting all vehicles

In [None]:
all_vehicles = get_column_index(unique_values, col_to_group_by_id, cols_to_unique_values) 

## Plotting vehicles

In [None]:
def plot_each_vehicle( 
    df, vehicle_ids, search_col, search_cols, sort_by_col = "time", 
    base_index_of_vehicle_to_plot=0, vehicles_to_plot = 4, angle_threshold = [30, 180, 360] 
): 
    if(vehicles_to_plot%2==1): vehicles_to_plot+=1 
    if(vehicles_to_plot<4): vehicles_to_plot = 4 
    vehicle_ids = vehicle_ids[ 
        base_index_of_vehicle_to_plot:(base_index_of_vehicle_to_plot+vehicles_to_plot) 
    ] 
    df = df.sort_values([search_col, sort_by_col], ascending=True) 
    vehicle_plot, axes_plot = plt.subplots( 
        int(vehicles_to_plot/2), 2, 
#         sharex=True, sharey=True, 
        figsize=(FIG_SZ[0]*3, FIG_SZ[1]*1.5*(vehicles_to_plot/2)), 
    ) 
    vehicle_plot.supxlabel("longitude") 
    vehicle_plot.supylabel("latitude") 
    rowcounter = 0 
    colcounter = 0 
    for vehicle_index, vehicle_id in enumerate(vehicle_ids): 
        # Plotting actual path followed by the vehicle 
        vehicle_actual = df[df[search_col] == vehicle_id] 
        vehicle_actual_plot, = axes_plot[rowcounter, colcounter].plot( 
            vehicle_actual["x"], 
            vehicle_actual["y"], 
            marker=".", 
            markersize=MARKER_SZ*1, 
        ) 
        
        # Plotting predicted path followed by the vehicle 
        vehicle_prediction = get_predictions(vehicle_actual) 
        vehicle_prediction_plot, = axes_plot[rowcounter, colcounter].plot( 
            vehicle_prediction["x"], 
            vehicle_prediction["y"], 
            "ro", 
            markersize=MARKER_SZ*2, 
            mfc="None", 
            mew=2, 
            alpha=0.7, 
            linewidth=1.0, 
        ) 
        
        # Plotting start/end coordinate of the vehicle 
        vehicle_start_coordinate, vehicle_end_coordinate = get_start_end_coordinates(vehicle_actual, "time") 
        vehicle_start_plot, = axes_plot[rowcounter, colcounter].plot( 
            vehicle_start_coordinate[0], 
            vehicle_start_coordinate[1], 
            "go", 
            markersize=MARKER_SZ*2.5, 
            mfc="None", 
            mew=2, 
            alpha=0.7, 
            linewidth=1.0, 
        ) 
        vehicle_end_plot, = axes_plot[rowcounter, colcounter].plot( 
            vehicle_end_coordinate[0], 
            vehicle_end_coordinate[1], 
            "ko", 
            markersize=MARKER_SZ*2.5, 
            mfc="None", 
            mew=2, 
            alpha=0.7, 
            linewidth=1.0, 
        ) 
        
        # Plotting sharp turns of the vehicle 
        vehicle_sharp_turns = get_sharp_turns(vehicle_actual, angle_threshold)
        vehicle_sharp_turns_plot, = axes_plot[rowcounter, colcounter].plot( 
            vehicle_sharp_turns["x"], 
            vehicle_sharp_turns["y"], 
            "k>", 
            markersize=MARKER_SZ*3, 
        ) 
        
        # Calculating errors of the predictions 
        vehicle_actual, vehicle_prediction = setup_actual_and_predicted_dataframes( 
            vehicle_actual, vehicle_prediction 
        ) 
        vehicle_error_data = get_error_data(vehicle_actual, vehicle_prediction, cols_to_search) 
        vehicle_error_mean_square = root_mean_square_error(vehicle_error_data) 
        error_rows = [] 
        vehicle_error = pd.DataFrame(error_rows,columns=cols_to_search) 
        vehicle_erro_plot, = axes_plot[rowcounter, colcounter].plot( 
            vehicle_error["x"], 
            vehicle_error["y"], 
            alpha=0.0, 
        ) 
        
        # Configuring the legends of above plots 
        vehicle_plot_legend = axes_plot[rowcounter, colcounter].legend( 
            [
                vehicle_actual_plot, 
                vehicle_prediction_plot, 
                vehicle_start_plot, 
                vehicle_end_plot, 
                vehicle_sharp_turns_plot, 
                vehicle_erro_plot 
            ], 
            [ 
                "Actual path".format(vehicle_id), 
                "Predicted path".format(vehicle_id), 
                "Start: [{:}, {:}]".format(int(vehicle_start_coordinate[0]), int(vehicle_start_coordinate[1])), 
                "End: [{:}, {:}]".format(int(vehicle_end_coordinate[0]), int(vehicle_end_coordinate[1])), 
                "Sharp turns", 
                "RMSE: {:.2f}".format(vehicle_error_mean_square), 
            ], 
            loc="best" 
        ) 
        axes_plot[rowcounter, colcounter].add_artist(vehicle_plot_legend) 
        axes_plot[rowcounter, colcounter].set_title("{:} trajectory".format(vehicle_id)) 
        colcounter=(colcounter+1)%2 
        if(colcounter%2 == 0): rowcounter+=1 

In [None]:
plot_each_vehicle( 
    df_org, all_vehicles, col_to_group_by_id, cols_to_unique_values, col_to_sort_by_id, 
    start_index_of_vehicle_id, number_of_vehicles_to_plot, angle_threshold 
) 

# Calculate RMSE for all vehicles

In [None]:
def error_all_vehicles( 
    df, vehicle_ids, search_col, search_cols, sort_by_col = "time", 
    vehicles_to_plot = 4, angle_threshold = [30, 180, 360], cols_error = ["id", "rmse"] 
): 
    vehicles_to_plot = vehicle_ids.shape[0] 
    vehicle_ids = vehicle_ids[0:vehicles_to_plot] 
#     print (vehicle_ids) 
    df = df.sort_values([search_col, sort_by_col], ascending=True) 
    
    error_rows = [] 
    for vehicle_index, vehicle_id in enumerate(vehicle_ids): 
        vehicle_actual = df[df[search_col] == vehicle_id] 

        # Getting the predicted path followed by the vehicle 
        vehicle_prediction = get_predictions(vehicle_actual) 

        # Calculating errors of the predictions 
        vehicle_actual, vehicle_prediction = setup_actual_and_predicted_dataframes( 
            vehicle_actual, vehicle_prediction 
        ) 
        vehicle_error_data = get_error_data(vehicle_actual, vehicle_prediction, cols_to_search) 
        vehicle_error_mean_square = root_mean_square_error(vehicle_error_data) 
        error_rows.append({ 
            "id": vehicle_id, 
            "rmse": vehicle_error_mean_square, 
        }) 
    
    vehicle_error = pd.DataFrame(error_rows,columns=cols_error) 
    return vehicle_error

In [None]:
df_error = error_all_vehicles( 
    df_org, all_vehicles, col_to_group_by_id, cols_to_unique_values, col_to_sort_by_id, 
    number_of_vehicles_to_plot, angle_threshold, cols_error = ["id", "rmse"] 
) 

In [None]:
df_error = df_error.sort_values( 
    ["rmse"], ascending=True 
) 

In [None]:
print ("Error DataFrame: ") 
print (df_error) 

In [None]:
number_of_error_items = df_error["id"].shape[0] 
error_sum = df_error["rmse"].sum() 
print ("RMSE for all vehicles: {:.3f}".format(error_sum/number_of_error_items)) 

# Plotting RMSE for all vehicles

In [None]:
def plot_each_error(df): 
    df = df.sort_values(["rmse"], ascending=True) 
    vehicle_error_plot, axes_error_plot = plt.subplots( 
        1, 
#         sharex=True, sharey=True, 
        figsize=(FIG_SZ[0]*2.5, FIG_SZ[1]*2.5), 
    ) 
    vehicle_error_plot.supxlabel("") 
    vehicle_error_plot.supylabel("error") 
    vehicle_error_plot, = axes_error_plot.plot(
        df["rmse"], 
#         df["id"], 
        "ro", 
        markersize=MARKER_SZ*2, 
        mfc="None", 
        mew=2, 
        alpha=0.7, 
        linewidth=1.0, 
#         marker=".", color="red", markerfacecolor="None" 
    ) 
        
    # Configuring the legend of above plot 
    vehicle_plot_legend = axes_error_plot.legend( 
        [vehicle_error_plot], ["Prediction error of all vehciles"], loc="best" 
    ) 
    axes_error_plot.add_artist(vehicle_plot_legend) 
    axes_error_plot.set_title("Prediction error of all vehciles") 

In [None]:
plot_each_error(df_error)