### Computing distances
This script is used to develop the functions to compute relative distances between observations to set  
the stage to connecting nodes together in the Directed Acyclic Graph  
Based on: https://datavalet.atlassian.net/wiki/spaces/PM/pages/1626308609/Counting+with+position

In [396]:
import sys  
sys.path.insert(0, './Functions/')

import Window

In [397]:
import itertools
from itertools import chain
import numpy as np
import math
import pandas as pd
import copy
import uuid
from numpy import linalg as LA

In [398]:
channel_df = pd.read_csv(f'channel.csv')

In [399]:
def distance_function(fspl, frequency):
    '''
    Returns a distance in meter from the parameters:
    fspl: basically the signal strength observed in absolute value
    frequency: the frequency in ghz
    '''
    return 10**((abs(fspl)-(20*math.log(frequency, 10))-32.45)/20)

def return_channel_frequency(channel_df, channel_num):
    return channel_df[channel_df['CHANNEL NUMBER']==channel_num]['FREQUENCY']/1000

def cycle_and_apply(x):
    signal_strength_list = []
    ap_name_list = []
    for ap_observation in x:
        signal_strength_list.append(distance_function(ap_observation[1], ap_observation[2]))
        ap_name_list.append(ap_observation[0])
        
    return signal_strength_list, ap_name_list, len(x)

def generate_uuid(x):
    return uuid.uuid4()

def transform_dict_entry(x, column, new_colum, fct):
    x[new_colum] = x[column].apply(lambda x: fct(x))
    return x

def explode_outer(x, column, new_columns):
    x[new_columns] = pd.DataFrame(x[column].tolist())
    return x

def fetch_one_item(master_dict, key, item_position):
    return master_dict[key].loc[item_position]

def compare_set_column(x, list_to_compare):
    return set(x) == set(list_to_compare)

def subs_list(x, float_to_subs):
    return [LA.norm(np.subtract(np.array(x),np.array(float_to_subs)))]

def compare_list_distance(x, boundary_to_compare):
    return x[0] < boundary_to_compare

def compute_distance_window(master_dict, item, key, discriminant_walking, time_window_length):
    working_df = master_dict[key][master_dict[key]['number_ap'] == item['number_ap']]
    working_df['ap_name_bool'] = working_df['ap_name'].apply(lambda x: compare_set_column(x,item['ap_name']))
    working_df = working_df[working_df['ap_name_bool']]
    
    boundary = discriminant_walking*time_window_length*2
    
    working_df['distance'] = working_df['distance'].apply(lambda x: subs_list(x,item['distance']))
    working_df['distance_bool'] = working_df['distance'].apply(lambda x: compare_list_distance(x,boundary))
    working_df = working_df[working_df['distance_bool']]
    #adding the item name as a column
    working_df['source_device_mac'] = item['device_mac']
    return working_df[['source_device_mac','device_mac','distance','ap_name','number_ap']].to_dict('records')

def compute_n_windows(keys, **kwargs):
    list_of_dicts = []
    i = 1.0
    for key in keys:
        kwargs['discriminant_walking'] = kwargs['discriminant_walking'] * i
        list_of_dicts.append(compute_distance_window(key = key, **kwargs))
        i = i*1.10
    return list_of_dicts
    

In [400]:
%%bigquery df_onroute_position_window
Select *
from data-prod-270222.datascience.position_raw

Query complete after 0.00s: 100%|██████████| 1/1 [00:00<00:00, 688.83query/s] 
Downloading: 100%|██████████| 82228/82228 [00:00<00:00, 137839.19rows/s]


In [401]:
df_onroute_position_window = df_onroute_position_window.sort_values(['Timestamp','device_mac','ap_mac']).reset_index(drop=True)
df_onroute_position_window['signal_strength'] = df_onroute_position_window[['ap_mac','signal_strength', 'channel']].values.tolist()

kwargs = {
 'min_window_len':2,
 'min_value_len':1   
}
window_index = Window.generate_window_dict(df_onroute_position_window, 'Timestamp', window_size=1800, window_frequency=900, **kwargs)
sub_window_index = dict(itertools.islice(window_index.items(), 100))
df_subset_list = {}

for key, value in sub_window_index.items():
    df_subset_list[key] = df_onroute_position_window[value[0]:value[1]].groupby(['device_mac'])['signal_strength'].apply(lambda x: list(np.unique(x))).apply(list).reset_index()

copy_dict = copy.deepcopy(df_subset_list)
#computing the distance by observation and by ap
copy_dict = dict((k, transform_dict_entry(v, 'signal_strength', 'distance_by_ap', cycle_and_apply)) for k,v in copy_dict.items())  
#exploding the array produced into a distance column and coresponding ap_name column and the numbers of ap encountered
copy_dict = dict((k, explode_outer(v, 'distance_by_ap', ['distance','ap_name','number_ap'])) for k,v in copy_dict.items())
#cleaning the resulting array
copy_dict = dict((k, v.drop(['signal_strength', 'distance_by_ap'], axis=1)) for k,v in copy_dict.items())
#replacing the device_mac by a true random uuid in order to avoid breaking the next algorithms
copy_dict = dict((k, transform_dict_entry(v, 'device_mac', 'device_mac', generate_uuid)) for k,v in copy_dict.items()) 

In [407]:
#storing the keys only
dict_keys = list(copy_dict.keys())
first_item = fetch_one_item(copy_dict, dict_keys[0], 8)
kwargs = {
    'discriminant_walking' : 1.42,
    'time_window_length' : dict_keys[0][1]-dict_keys[0][0],
    'master_dict' : copy_dict,
    'item' : first_item
}
dicts = compute_n_windows(dict_keys[1:8], **kwargs)
tuple_dataframe = pd.DataFrame.from_dict(list(chain.from_iterable(dicts)))
tuple_dataframe

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,source_device_mac,device_mac,distance,ap_name,number_ap
0,41c6a99f-fdb4-4218-93ce-0837cfbec729,b33a8df9-3117-4f92-9bbd-26c456c53cda,[1.281151273182968],[CCD083CF7016],1
1,41c6a99f-fdb4-4218-93ce-0837cfbec729,77d60f76-3bac-4d5d-a9a4-e73f588d4897,[1.281151273182968],[CCD083CF7016],1
2,41c6a99f-fdb4-4218-93ce-0837cfbec729,4a92469b-7e02-4776-82e1-8dacd25fec60,[1.4057265610679004],[CCD083CF7016],1
3,41c6a99f-fdb4-4218-93ce-0837cfbec729,ec2ec9bd-1584-447b-8373-188d1bd11453,[3.5140447758566054],[CCD083CF7016],1
4,41c6a99f-fdb4-4218-93ce-0837cfbec729,300586a7-5207-4ea3-99d9-4d0e25b4e694,[1.4057265610679004],[CCD083CF7016],1
5,41c6a99f-fdb4-4218-93ce-0837cfbec729,90e119f5-2cb7-496e-b613-2bdb2849d7e0,[1.5930244291313902],[CCD083CF7016],1
6,41c6a99f-fdb4-4218-93ce-0837cfbec729,8764d18a-e809-40a9-92d6-be082dd7797a,[5.981660408003277],[CCD083CF7016],1
7,41c6a99f-fdb4-4218-93ce-0837cfbec729,3af1b605-9cc9-414f-a1d6-c319b497d212,[1.3211599325700267],[CCD083CF7016],1
8,41c6a99f-fdb4-4218-93ce-0837cfbec729,86164998-eb41-4849-b710-fc2d0f5dada0,[7.140098350205681],[CCD083CF7016],1
9,41c6a99f-fdb4-4218-93ce-0837cfbec729,7946ede2-6b92-4d80-9c6e-54459e3d9107,[9.022322459571175],[CCD083CF7016],1
