### Computing distances
This script is used to develop the functions to compute relative distances between observations to set  
the stage to connecting nodes together in the Directed Acyclic Graph  
Based on: https://datavalet.atlassian.net/wiki/spaces/PM/pages/1626308609/Counting+with+position

In [1]:
import sys  
sys.path.insert(0, './Functions/')

import Window

In [17]:
import itertools
import numpy as np
import math
import pandas as pd
import copy

In [3]:
channel_df = pd.read_csv(f'channel.csv')

In [133]:
def distance_function(fspl, frequency):
    '''
    Returns a distance in meter from the parameters:
    fspl: basically the signal strength observed in absolute value
    frequency: the frequency in ghz
    '''
    return 10**((abs(fspl)-(20*math.log(frequency, 10))-32.45)/20)

def return_channel_frequency(channel_df, channel_num):
    return channel_df[channel_df['CHANNEL NUMBER']==channel_num]['FREQUENCY']/1000

def cycle_and_apply(x):
    signal_strength_list = []
    ap_name_list = []
    for ap_observation in x:
        signal_strength_list.append(distance_function(ap_observation[1], ap_observation[2]))
        ap_name_list.append(ap_observation[0])
        
    return signal_strength_list, ap_name_list, len(x)

def transform_dict_entry(x):
    x['distance_by_ap'] = x['signal_strength'].apply(lambda x: cycle_and_apply(x))
    return x

def explode_outer(x, column, new_columns):
    x[new_columns] = pd.DataFrame(x[column].tolist())
    return x

In [143]:
%%bigquery df_onroute_position_window
Select *
from data-prod-270222.datascience.position_raw

Query complete after 0.00s: 100%|██████████| 1/1 [00:00<00:00, 807.37query/s] 
Downloading: 100%|██████████| 82427/82427 [00:00<00:00, 156205.38rows/s]


In [144]:
df_onroute_position_window = df_onroute_position_window.sort_values(['Timestamp','device_mac','ap_mac']).reset_index(drop=True)
df_onroute_position_window['signal_strength'] = df_onroute_position_window[['ap_mac','signal_strength', 'channel']].values.tolist()

kwargs = {
 'min_window_len':5,
 'min_value_len':1   
}
window_index = Window.generate_window_dict(df_onroute_position_window, 'Timestamp', window_size=1800, window_frequency=900, **kwargs)
sub_window_index = dict(itertools.islice(window_index.items(), 100))
df_subset_list = {}

for key, value in sub_window_index.items():
    df_subset_list[key] = df_onroute_position_window[value[0]:value[1]].groupby(['device_mac'])['signal_strength'].apply(lambda x: list(np.unique(x))).apply(list).reset_index()

copy_dict = copy.deepcopy(df_subset_list)
#computing the distance by observation and by ap
copy_dict = dict((k, transform_dict_entry(v)) for k,v in copy_dict.items())    
#exploding the array produced into a distance column and coresponding ap_name column and the numbers of ap encountered
copy_dict = dict((k, explode_outer(v, 'distance_by_ap', ['distance','ap_name','number_ap'])) for k,v in copy_dict.items())  
#cleaning the resulting array
copy_dict = dict((k, v.drop(['signal_strength', 'distance_by_ap'], axis=1)) for k,v in copy_dict.items())

In [145]:
copy_dict[next(iter(copy_dict))]

Unnamed: 0,device_mac,distance,ap_name,number_ap
0,36BF4B4DBF3C,[30.62720143104461],[CCD083CF729E],1
1,462E2AC8A109,"[6.856576325887005, 8.631918174565437]","[CCD083CF7016, CCD083CF729E]",2
2,56D97A9C14EB,[0.12060684710619668],[CCD083CF729E],1
3,62BC21B94DEC,[0.42792924186525283],[CCD083CF729E],1
4,7685E3A9E9E7,[7.693205170975173],[CCD083CF7016],1
5,861C90C9EF00,[2.5792691065487903],[CCD083CF7016],1
6,AAFEA44A6DF7,"[0.5446372169194125, 24.32805084726665]","[CCD083CF7016, CCD083CF729E]",2
7,C4AC596D5B43,[68.56576325887012],[CCD083CF729E],1
8,CCD0837729E0,"[4.854084305219787, 1.9324457683791878]","[CCD083CF7016, CCD083CF729E]",2
9,D807B6C4D06E,[75.60142746036222],[CCD083CF7016],1
