In [45]:
# import s2sphere as s2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import radians, cos, sin, asin, sqrt

def cleanData(df):
    df = df.drop('bearing', axis=1)
    df = df.drop('speed', axis=1)
    df = df.drop('country_id', axis=1)
    df = df.drop('service_type', axis=1)
    df = df.drop('location', axis=1)
    df = df.drop('hub_id', axis=1)
    return df

def splitByFulfilment(df):
    dfs = []
    for region, df_region in df.groupby('fulfillment_number'):
        df_region = df_region.sort_values('timestamp', ascending=True)
        dfs.append(df_region)
    return dfs

def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    km = 6367 * c
    return km

# Returns a dataframe with all the possible combinations 
# of lat-long distances(uni-directional)
def generateAllSubRoutes(df):
    df['prev_dis'] = None
    df['dis_from_start'] = None
    prev_dis_index = df.columns.get_loc("prev_dis")
    dis_from_start_index = df.columns.get_loc("dis_from_start")
    lat_index = df.columns.get_loc("latitude")
    lon_index = df.columns.get_loc("longitude")
    for i in range(0,df.shape[0]):
        if i == 0:
            df.iloc[i, prev_dis_index] = 0
            df.iloc[i, dis_from_start_index] = 0
        else:
            distance = haversine(df.iloc[i-1, lat_index],df.iloc[i-1, lon_index], df.iloc[i, lat_index],df.iloc[i, lon_index])
            df.iloc[i, prev_dis_index] = distance
            df.iloc[i, dis_from_start_index] = df.iloc[i-1, dis_from_start_index] + distance
    
    print(df.loc[:, 'prev_dis':'dis_from_start'])
    

df = pd.read_csv("~/Downloads/PathDataExperimental.csv")
df = cleanData(df)
# print("Columns : %s" %df.shape[1])
# print("Rows : %s" %df.shape[0])

# print(df.fulfillment_number.value_counts())
df_split_by_fulfilment = splitByFulfilment(df)

f1 = df_split_by_fulfilment[0]
print("Columns : %s" %f1.shape[1])
print("Rows : %s" %f1.shape[0])
# print(df.columns.values)
generateAllSubRoutes(f1)


                

Columns : 9
Rows : 43
       prev_dis dis_from_start
128           0              0
210   0.0827047      0.0827047
106   0.0333762       0.116081
228  0.00739248       0.123473
131   0.0122396       0.135713
223   0.0020565       0.137769
150  0.00173197       0.139501
113  0.00156332       0.141065
258  0.00302617       0.144091
112   0.0199534       0.164044
181  0.00408973       0.168134
243  0.00261522       0.170749
57   0.00542284       0.176172
149  0.00273593       0.178908
246  0.00201862       0.180927
21   0.00177567       0.182702
265   0.0109318       0.193634
241    0.319706        0.51334
8     0.0628287       0.576169
107    0.152793       0.728962
230    0.430477        1.15944
302     0.25472        1.41416
262    0.237664        1.65182
205   0.0743989        1.72622
245    0.367687        2.09391
180    0.063444        2.15735
117    0.137093        2.29444
56     0.422294        2.71674
148    0.335412        3.05215
242    0.120344        3.17249
64     0.216338  