In [8]:
import pandas as pd
import numpy as np
from math import sin, cos, sqrt, atan2, radians
from scipy.spatial.distance import pdist, squareform

df = pd.DataFrame({'DISTANCE':[21,12,13,31,14,25],
                   'REGION': ['SJDR1', 'LD1', 'LD1', 'LD', 'TR', 'SVM']})

df

Unnamed: 0,DISTANCE,REGION
0,21,SJDR1
1,12,LD1
2,13,LD1
3,31,LD
4,14,TR
5,25,SVM


In [10]:
merged

Unnamed: 0,USER,ANTENNA_ID,PRESUMED_RESIDENCE
0,1,SJDR1,SJDR1
1,1,LD,SJDR1
2,1,TR,SJDR1
3,2,LD1,LD1
4,2,SVM,LD1
5,3,LD1,LD1


In [3]:
antennas_loc = pd.DataFrame({'ANTENNA_ID': ['SJDR1', 'LD1', 'LD2', 'TR', 'SVM'],'LAT': [-22.98, -22.97, -22.92, -22.87, -22.89], 'LONG': [-43.19, -43.39, -43.24, -43.28, -43.67]})
antennas_loc

Unnamed: 0,ANTENNA_ID,LAT,LONG
0,SJDR1,-22.98,-43.19
1,LD1,-22.97,-43.39
2,LD2,-22.92,-43.24
3,TR,-22.87,-43.28
4,SVM,-22.89,-43.67


In [4]:
def dist(x, y):
    
    lat1 = radians(x[0])
    lon1 = radians(x[1])
    lat2 = radians(y[0])
    lon2 = radians(y[1])
    
    R = 6373.0
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    
    a = sin(dlat/2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon/2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    
    distance = R * c
    
    return round(distance, 4)

distances = pdist(antennas_loc[['LAT', 'LONG']].values, metric=dist)

points = antennas_loc['ANTENNA_ID'].values

result = pd.DataFrame(squareform(distances), columns=points, index=points)

result

Unnamed: 0,SJDR1,LD1,LD2,TR,SVM
SJDR1,0.0,20.5115,8.4123,15.3203,50.1784
LD1,20.5115,0.0,16.34,15.8341,30.0319
LD2,8.4123,16.34,0.0,6.9086,44.1838
TR,15.3203,15.8341,6.9086,0.0,40.0284
SVM,50.1784,30.0319,44.1838,40.0284,0.0


In [5]:
distances = pdist(antennas_loc[['LAT', 'LONG']].values, metric=dist)

points = antennas_loc['ANTENNA_ID'].values

result = pd.DataFrame(squareform(distances), columns=points, index=points)

result

Unnamed: 0,SJDR1,LD1,LD2,TR,SVM
SJDR1,0.0,20.5115,8.4123,15.3203,50.1784
LD1,20.5115,0.0,16.34,15.8341,30.0319
LD2,8.4123,16.34,0.0,6.9086,44.1838
TR,15.3203,15.8341,6.9086,0.0,40.0284
SVM,50.1784,30.0319,44.1838,40.0284,0.0


In [7]:
melt_df = pd.melt(result.reset_index(), id_vars='index')
melt_df.rename(columns={'index': 'ANTENNA_ID', 'variable': 'PRESUMED_RESIDENCE', 'value': 'DISTANCE'}, inplace=True)

melt_df

Unnamed: 0,ANTENNA_ID,PRESUMED_RESIDENCE,DISTANCE
0,SJDR1,SJDR1,0.0
1,LD1,SJDR1,20.5115
2,LD2,SJDR1,8.4123
3,TR,SJDR1,15.3203
4,SVM,SJDR1,50.1784
5,SJDR1,LD1,20.5115
6,LD1,LD1,0.0
7,LD2,LD1,16.34
8,TR,LD1,15.8341
9,SVM,LD1,30.0319


In [9]:
df_main = pd.merge(merged, melt_df, left_on=['ANTENNA_ID', 'PRESUMED_RESIDENCE'], right_on=['ANTENNA_ID', 'PRESUMED_RESIDENCE'])
df_final = df_main.groupby(['USER', 'PRESUMED_RESIDENCE']).agg({'ANTENNA_ID': list, 'DISTANCE': sum}).reset_index()
df_final.rename(columns={'ANTENNA_ID':'CALL_LOC', 'DISTANCE': 'TRAVELLED_DISTANCE'}, inplace=True)

df_final


Unnamed: 0,USER,PRESUMED_RESIDENCE,CALL_LOC,TRAVELLED_DISTANCE
0,1,SJDR1,"[SJDR1, TR]",15.3203
1,2,LD1,"[LD1, SVM]",30.0319
2,3,LD1,[LD1],0.0


In [12]:
melt_df

Unnamed: 0,ANTENNA_ID,PRESUMED_RESIDENCE,DISTANCE
0,SJDR1,SJDR1,0.0
1,LD1,SJDR1,20.5115
2,LD2,SJDR1,8.4123
3,TR,SJDR1,15.3203
4,SVM,SJDR1,50.1784
5,SJDR1,LD1,20.5115
6,LD1,LD1,0.0
7,LD2,LD1,16.34
8,TR,LD1,15.8341
9,SVM,LD1,30.0319
