
# Field Relationship Manager - Distance Travelled

In [35]:
import pandas as pd
import numpy as np

## Load and prepare Data

In [36]:
datafile = './frm_locations.csv'
frm_data = pd.read_csv(datafile)
frm_data.rename(columns={'date': 'time_stamp', 'latitude': 'start_lat',
                         'longitude': 'start_long'}, inplace=True)
frm_data['date'] = frm_data['time_stamp'].apply(lambda x: x[:10])
frm_data['time'] = frm_data['time_stamp'].apply(lambda x: x[11:19])
frm_data = frm_data.groupby(['frm_id', 'date'])

## Calculate Distance

In [37]:
def calculate_distance(dataframe):
    '''
    Calculate the distance.
    '''
    # sort the values by time
    dataframe.sort_values('time', inplace=True)

    # find coordinates of each chord in the path
    dataframe['end_lat'] = dataframe['start_lat'].shift(periods=-1)
    dataframe['end_long'] = dataframe['start_long'].shift(periods=-1)
    dataframe['end_lat'].iat[-1] = dataframe['start_lat'].iloc[-1]
    dataframe['end_long'].iat[-1] = dataframe['start_long'].iloc[-1]
    dataframe['diff_lat'] = (dataframe['start_lat'] - dataframe['end_lat'])/2
    dataframe['diff_long'] = (dataframe['start_long'] - dataframe['end_long'])/2
    
    # calculate the distance for each chord in the path
    earth_radius = 6371  # in kilometers
    dataframe['chord_distance'] = ((dataframe['diff_lat'].apply(np.sin))**2 +
                                           dataframe['start_lat'].apply(np.cos) *
                                           dataframe['end_lat'].apply(np.cos) *
                                           (dataframe['diff_long'].apply(np.sin))**2)
    dataframe['chord_distance'] = 2*earth_radius*(dataframe['chord_distance'].apply(np.sqrt)).apply(np.arcsin)
    return dataframe
    
frm_data = frm_data.apply(calculate_distance)  # returns dataframe
frm_data.reset_index(drop=True, inplace=True)
distance_dataframe = frm_data.loc[:, ['frm_id', 'date', 'chord_distance']]
distance_dataframe.set_index(['frm_id', 'date'], inplace=True)

# clean outliers
upper_threshold = 10  # max movement is 10 kilometers
lower_threshold = 0.001  # min movement is 1 meter
distance_dataframe = distance_dataframe[distance_dataframe <= upper_threshold]
distance_dataframe = distance_dataframe[distance_dataframe >= lower_threshold]

# calculate total distance travelled
distance_dataframe = distance_dataframe.groupby(by=['frm_id', 'date'])
distance_dataframe = distance_dataframe.sum()  # returns dataframe
distance_dataframe = distance_dataframe.unstack()['chord_distance']
distance_dataframe.columns.name = None  # remove the column header name
distance_dataframe.reset_index(inplace=True)

## Visualization

In [38]:
display(distance_dataframe)

Unnamed: 0,frm_id,2020-07-20,2020-07-21,2020-07-22,2020-07-23,2020-07-24
0,753aa4603d9f9f250ffe7f63e77bac1c,46.7205,50.206429,30.853907,22.248766,25.973244
1,84a9427621fe0669fbacbe6d7dc25b3d,38.236306,57.809056,48.030754,25.498006,17.527965
2,ab472ecc61608f512e4b1d4c2b49e8f8,33.694254,26.712897,55.47727,7.992916,81.527247
3,b96dc9bb896edad075da09f0c4e9098c,90.746647,80.082138,145.98504,231.779679,56.24548
4,e89712a6621c47b9485e0b06841f809f,234.818667,183.467136,185.942358,141.97385,133.342832
5,f9a1994416db0f2255ef187f87c38638,2.388283,13.487773,23.130174,26.467072,27.93583
