# Case Study 1 (Working file)

## Imports

In [508]:
# imports
import pandas as pd
import matplotlib.pyplot as plt
from math import floor
from random import sample

### Code/Functions

In [589]:
def round_angle(angle):
    buckets = [angle for angle in angle_buckets()]
    angle = angle if angle < 360 else angle-360
    angles = [abs(a - angle) for a in buckets]
    sorted_angles = angles.copy()
    sorted_angles.sort()
    min_val = sorted_angles[0]
    min_index = angles.index(min_val)
    return buckets[min_index]
    

def angle_buckets(start = 0, end = 360, step = 45):
     for i in range(0, int(round(end/step, 0)) + 1):
            val = start + i*step
            yield val


def get_train_data_by_angle(ref_angle=225, data=df, angles=3):
    ref_angle = round_angle(ref_angle)
    assert angles <= 7
    buckets = [angle for angle in angle_buckets()]
    start_index = buckets.index(ref_angle)- int(floor(angles/2))
    keep_angles = []
    for x in range(start_index, start_index + angles):
        keep_angles.append(buckets[x])
    #print(keep_angles)
    data = data[data['mapped_orientation'].isin(keep_angles)].copy()
    grouped_df = data.groupby(['mac', 'xy-loc'])['signal'].agg(['mean']).reset_index()
    grouped_df = pd.pivot_table(grouped_df, values='mean', columns='mac', index=['xy-loc'])
    return grouped_df

def calc_distance(sample_data, test_row):
    differences = []
    for index, row in sample_data.iterrows():
        diff = sum([(row[col]-test_row[col])**2 for col in macs_to_keep])**0.5
        #test_row.loc[index, 'distance'] = diff
        differences.append(diff)
    sample_data['distance'] = differences
    return sample_data.sort_values(['distance'])


def predict(sample_data, k=3, weight=False):
    sample_data = sample_data.head(k).copy()
    sample_data['x'] = sample_data['xy-loc'].apply(lambda x: x.split('-')[0])
    sample_data['y'] = sample_data['xy-loc'].apply(lambda y: y.split('-')[1])
    sample_data['x'] = pd.to_numeric(sample_data['x'])
    sample_data['y'] = pd.to_numeric(sample_data['y'])
    return {
        'pred_x':sample_data['x'].mean(),
        'pred_y': sample_data['y'].mean()
    }

### Parameters

In [510]:
macs_to_keep = ['00:0f:a3:39:e1:c0',    
            '00:0f:a3:39:dd:cd', 
            '00:14:bf:b1:97:8a',
            '00:14:bf:3b:c7:c6',    
            '00:14:bf:b1:97:90',  
            '00:14:bf:b1:97:8d',
            '00:14:bf:b1:97:81',
            '02:00:42:55:31:00']

angles = [a for a in angle_buckets()]

## Create DataFrame

In [511]:
column_names = ['time', 'scanMac', 'posX', 'posY', 'posZ', 'orientation', 'mac', 'signal', 'channel', 'type']

lines = None
with open('../Data/offline.final.trace.txt', 'r') as file:
    lines = file.readlines()
    file.close()

instances = [] # will hold final data for dataframe

for line in lines:
    
    # strip "\n" from line
    line = line.rstrip('\n')
    
    # skip if comment
    if line[0] == '#':
        continue

    base = [] # [t, id, x, y, z, degree]
    rows = [] # base + [mac, signal, channel, type]

    for keyvalue in line.split(';'):
        key, value = keyvalue.split('=')

        if key in ['t', 'id', 'degree']:
            base.append(value)
        elif key == 'pos':
            # pos (x, y, z)
            base += value.split(',')
        else:
            # mac addresses and metrics (signal, channel, type)
            row = base.copy()
            row.append(key)
            row += value.split(',')
            rows.append(row)
    
    instances += rows

df = pd.DataFrame(instances, columns = column_names)

print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1181628 entries, 0 to 1181627
Data columns (total 10 columns):
time           1181628 non-null object
scanMac        1181628 non-null object
posX           1181628 non-null object
posY           1181628 non-null object
posZ           1181628 non-null object
orientation    1181628 non-null object
mac            1181628 non-null object
signal         1181628 non-null object
channel        1181628 non-null object
type           1181628 non-null object
dtypes: object(10)
memory usage: 90.2+ MB
None


Unnamed: 0,time,scanMac,posX,posY,posZ,orientation,mac,signal,channel,type
0,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:14:bf:b1:97:8a,-38,2437000000,3
1,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:14:bf:b1:97:90,-56,2427000000,3
2,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:0f:a3:39:e1:c0,-53,2462000000,3
3,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:14:bf:b1:97:8d,-65,2442000000,3
4,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:14:bf:b1:97:81,-65,2422000000,3


In [512]:
column_names = ['time', 'scanMac', 'posX', 'posY', 'posZ', 'orientation', 'mac', 'signal', 'channel', 'type']

lines = None
with open('../Data/online.final.trace.txt', 'r') as file:
    lines = file.readlines()
    file.close()

instances = [] # will hold final data for dataframe

for line in lines:
    
    # strip "\n" from line
    line = line.rstrip('\n')
    
    # skip if comment
    if line[0] == '#':
        continue

    base = [] # [t, id, x, y, z, degree]
    rows = [] # base + [mac, signal, channel, type]

    for keyvalue in line.split(';'):
        key, value = keyvalue.split('=')

        if key in ['t', 'id', 'degree']:
            base.append(value)
        elif key == 'pos':
            # pos (x, y, z)
            base += value.split(',')
        else:
            # mac addresses and metrics (signal, channel, type)
            row = base.copy()
            row.append(key)
            row += value.split(',')
            rows.append(row)
    
    instances += rows

df_online = pd.DataFrame(instances, columns = column_names)

df_online.orientation = pd.to_numeric(df_online['orientation'])
df_online['mapped_orientation'] = df_online['orientation'].apply(lambda x: round_angle(x))

df_online['xy-loc'] = df_online['posX'] + '-' + df_online['posY']


df_online['signal'] = pd.to_numeric(df_online['signal'])
df_online = df_online[df_online.mac.isin(macs_to_keep)].copy()


df_online = df_online.groupby(['mac', 'xy-loc'])['signal'].agg(['mean']).reset_index()
df_online = pd.pivot_table(df_online, values='mean', columns='mac', index=['xy-loc'])

## add dummy angle
df_online['dummy_angle'] = df_online.apply(lambda x: sample(angles, 1)[0], axis=1)
df_online.head()

mac,00:0f:a3:39:dd:cd,00:0f:a3:39:e1:c0,00:14:bf:3b:c7:c6,00:14:bf:b1:97:81,00:14:bf:b1:97:8a,00:14:bf:b1:97:8d,00:14:bf:b1:97:90,02:00:42:55:31:00,dummy_angle
xy-loc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0-0.05,-63.207207,-52.227273,-62.94898,-61.813953,-40.068966,-63.043011,-55.233333,-86.369863,315
0.15-9.42,-66.117117,-55.275229,-73.961905,-72.701031,-47.813084,-69.454545,-46.88,-88.712766,45
0.31-11.09,-67.054054,-51.709091,-70.082474,-70.098901,-54.088235,-69.131579,-53.886598,-86.985507,135
0.47-8.2,-74.153153,-49.5,-64.258065,-72.597701,-45.652893,-60.797468,-49.58,-85.341463,0
0.78-10.94,-71.40367,-53.263636,-66.96,-66.809524,-48.413793,-65.0,-54.846939,-88.032258,270


## Exploratory Data Analysis (EDA)

In [513]:
df['mac'].value_counts()

00:0f:a3:39:e1:c0    145862
00:0f:a3:39:dd:cd    145619
00:14:bf:b1:97:8a    132962
00:14:bf:3b:c7:c6    126529
00:14:bf:b1:97:90    122315
00:14:bf:b1:97:8d    121325
00:14:bf:b1:97:81    120339
02:00:42:55:31:00    103887
02:64:fb:68:52:e6     50852
00:0f:a3:39:e0:4b     43508
02:2e:58:22:f1:ac     25112
00:0f:a3:39:e2:10     19162
02:37:fd:3b:54:b5      8732
02:b7:00:bb:a9:35      7602
02:5c:e0:50:49:de      6997
00:04:0e:5c:23:fc       418
00:30:bd:f8:7f:c5       301
00:e0:63:82:8b:a9       103
02:0a:3d:06:94:88         1
02:4f:99:43:30:cd         1
02:42:1c:4e:b5:c0         1
Name: mac, dtype: int64

In [514]:
df.describe()

Unnamed: 0,time,scanMac,posX,posY,posZ,orientation,mac,signal,channel,type
count,1181628,1181628,1181628.0,1181628.0,1181628.0,1181628.0,1181628,1181628,1181628,1181628
unique,146074,1,34.0,14.0,1.0,203.0,21,75,10,2
top,1139648662194,00:02:2D:21:0F:33,1.0,3.0,0.0,90.3,00:0f:a3:39:e1:c0,-59,2462000000,3
freq,21,1181628,109816.0,241614.0,1181628.0,26847.0,145862,40878,189774,978443


In [515]:
df = df[df.mac.isin(macs_to_keep)].copy()

In [516]:
df.orientation.unique()

array(['0.0', '45.2', '90.3', '135.9', '180.3', '225.6', '270.1', '315.0',
       '45.1', '90.7', '135.2', '180.1', '225.7', '269.7', '315.3', '0.3',
       '45.5', '90.5', '135.7', '180.2', '225.0', '270.9', '315.2', '0.7',
       '45.7', '89.8', '225.2', '269.9', '0.4', '90.4', '134.8', '180.4',
       '0.2', '45.6', '135.0', '179.6', '225.4', '269.5', '179.2',
       '224.5', '89.9', '134.4', '269.4', '359.9', '90.1', '135.1',
       '270.2', '314.8', '313.8', '45.8', '90.8', '134.9', '225.9',
       '270.5', '315.9', '45.4', '134.7', '180.0', '224.9', '269.8',
       '314.4', '0.1', '44.3', '225.3', '315.7', '0.5', '44.8', '89.4',
       '226.0', '315.1', '0.8', '89.2', '180.8', '224.8', '315.8', '44.4',
       '180.5', '314.9', '44.9', '179.8', '315.5', '135.6', '0.6',
       '135.4', '179.9', '225.1', '270.0', '45.3', '270.4', '45.0',
       '90.6', '46.0', '0.9', '44.6', '135.3', '90.2', '134.2', '44.5',
       '134.5', '226.1', '269.6', '270.3', '315.4', '1.5', '135.8',
       

In [517]:
df.orientation = pd.to_numeric(df['orientation'])
df['mapped_orientation'] = df['orientation'].apply(lambda x: round_angle(x))

In [518]:
df.signal = pd.to_numeric(df.signal)

In [519]:
df['xy-loc'] = df['posX'] + '-' + df['posY']
assert len(df['xy-loc'].unique()) == 166

In [520]:
#df = df.groupby(['mac', 'xy-loc'])['signal'].agg(['mean']).reset_index()
#df = pd.pivot_table(df, values='mean', columns='mac', index=['xy-loc'])

In [521]:
assert df_online.shape[0] == 60

### Include C0/CD

### Error Calcs: euclidian distance

### Train/test split

### Scikit Learn (conf matrix dimensions)

### Weighted kNN

In [594]:
m = 3 # determines number of angles swept in each call to get training data
k = range(1, 10) #search space for kNN algo 
weighted = False #return weighted predictions or unweighted
predictions = df_online.copy()
for index, sample_row in predictions.iterrows():
    sample_data = get_train_data_by_angle(data=df, ref_angle=sample_row['dummy_angle'], angles=m)
    
    sample_data = calc_distance(sample_data, sample_row)
    sample_data = sample_data.reset_index()
    p = predict(sample_data, k=3)
    print(p)
    print(sample_row)

{'pred_x': 1.0, 'pred_y': 5.0}
mac
00:0f:a3:39:dd:cd    -63.207207
00:0f:a3:39:e1:c0    -52.227273
00:14:bf:3b:c7:c6    -62.948980
00:14:bf:b1:97:81    -61.813953
00:14:bf:b1:97:8a    -40.068966
00:14:bf:b1:97:8d    -63.043011
00:14:bf:b1:97:90    -55.233333
02:00:42:55:31:00    -86.369863
dummy_angle          315.000000
Name: 0.0-0.05, dtype: float64
{'pred_x': 1.0, 'pred_y': 12.0}
mac
00:0f:a3:39:dd:cd   -66.117117
00:0f:a3:39:e1:c0   -55.275229
00:14:bf:3b:c7:c6   -73.961905
00:14:bf:b1:97:81   -72.701031
00:14:bf:b1:97:8a   -47.813084
00:14:bf:b1:97:8d   -69.454545
00:14:bf:b1:97:90   -46.880000
02:00:42:55:31:00   -88.712766
dummy_angle          45.000000
Name: 0.15-9.42, dtype: float64
{'pred_x': 0.6666666666666666, 'pred_y': 11.333333333333334}
mac
00:0f:a3:39:dd:cd    -67.054054
00:0f:a3:39:e1:c0    -51.709091
00:14:bf:3b:c7:c6    -70.082474
00:14:bf:b1:97:81    -70.098901
00:14:bf:b1:97:8a    -54.088235
00:14:bf:b1:97:8d    -69.131579
00:14:bf:b1:97:90    -53.886598
02:00:42:5

{'pred_x': 10.0, 'pred_y': 6.666666666666667}
mac
00:0f:a3:39:dd:cd    -59.172727
00:0f:a3:39:e1:c0    -41.669725
00:14:bf:3b:c7:c6    -52.329412
00:14:bf:b1:97:81    -58.087912
00:14:bf:b1:97:8a    -62.240385
00:14:bf:b1:97:8d    -51.673267
00:14:bf:b1:97:90    -68.349398
02:00:42:55:31:00    -81.404255
dummy_angle          270.000000
Name: 12.55-7.38, dtype: float64
{'pred_x': 13.0, 'pred_y': 4.0}
mac
00:0f:a3:39:dd:cd    -72.747748
00:0f:a3:39:e1:c0    -50.145455
00:14:bf:3b:c7:c6    -47.929293
00:14:bf:b1:97:81    -56.781609
00:14:bf:b1:97:8a    -57.382609
00:14:bf:b1:97:8d    -58.031915
00:14:bf:b1:97:90    -69.428571
02:00:42:55:31:00    -79.775281
dummy_angle          270.000000
Name: 12.95-5.25, dtype: float64
{'pred_x': 17.0, 'pred_y': 7.666666666666667}
mac
00:0f:a3:39:dd:cd    -65.281818
00:0f:a3:39:e1:c0    -53.227273
00:14:bf:3b:c7:c6    -53.549020
00:14:bf:b1:97:81    -53.061224
00:14:bf:b1:97:8a    -66.536585
00:14:bf:b1:97:8d    -44.180851
00:14:bf:b1:97:90    -77.11111

{'pred_x': 26.333333333333332, 'pred_y': 6.333333333333333}
mac
00:0f:a3:39:dd:cd    -78.036697
00:0f:a3:39:e1:c0    -56.596330
00:14:bf:3b:c7:c6    -64.306931
00:14:bf:b1:97:81    -53.967391
00:14:bf:b1:97:8a    -68.775510
00:14:bf:b1:97:8d    -53.684211
00:14:bf:b1:97:90    -70.620000
02:00:42:55:31:00    -64.626374
dummy_angle          360.000000
Name: 28.12-7.57, dtype: float64
{'pred_x': 24.333333333333332, 'pred_y': 5.0}
mac
00:0f:a3:39:dd:cd    -84.169492
00:0f:a3:39:e1:c0    -53.790909
00:14:bf:3b:c7:c6    -69.321429
00:14:bf:b1:97:81    -48.347368
00:14:bf:b1:97:8a    -68.840000
00:14:bf:b1:97:8d    -49.957447
00:14:bf:b1:97:90    -75.666667
02:00:42:55:31:00    -68.569892
dummy_angle          225.000000
Name: 29.58-7.93, dtype: float64
{'pred_x': 5.333333333333333, 'pred_y': 7.333333333333333}
mac
00:0f:a3:39:dd:cd   -62.518519
00:0f:a3:39:e1:c0   -51.231481
00:14:bf:3b:c7:c6   -63.433333
00:14:bf:b1:97:81   -62.395349
00:14:bf:b1:97:8a   -51.428571
00:14:bf:b1:97:8d   -52.05

In [None]:
#sample = pd.DataFrame(df_online.iloc[0, :]).T
#diffs = []
#for index, row in df.iterrows():
#        diff = sum([(row[col]-sample[col]).values[0]**2 for col in macs_to_keep])**0.5
#        df.loc[index, 'distance'] = diff

In [None]:
df

In [567]:
predictions

mac,00:0f:a3:39:dd:cd,00:0f:a3:39:e1:c0,00:14:bf:3b:c7:c6,00:14:bf:b1:97:81,00:14:bf:b1:97:8a,00:14:bf:b1:97:8d,00:14:bf:b1:97:90,02:00:42:55:31:00,dummy_angle
xy-loc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0-0.05,-63.207207,-52.227273,-62.94898,-61.813953,-40.068966,-63.043011,-55.233333,-86.369863,315
0.15-9.42,-66.117117,-55.275229,-73.961905,-72.701031,-47.813084,-69.454545,-46.88,-88.712766,45
0.31-11.09,-67.054054,-51.709091,-70.082474,-70.098901,-54.088235,-69.131579,-53.886598,-86.985507,135
0.47-8.2,-74.153153,-49.5,-64.258065,-72.597701,-45.652893,-60.797468,-49.58,-85.341463,0
0.78-10.94,-71.40367,-53.263636,-66.96,-66.809524,-48.413793,-65.0,-54.846939,-88.032258,270
0.93-11.69,-69.990741,-57.963636,-70.443396,-70.580247,-43.663462,-65.593023,-47.270833,-91.0,135
1.08-12.19,-73.4375,-54.827273,-69.201923,-67.925532,-52.008197,-71.586957,-51.666667,-86.978495,45
1.24-3.93,-71.110092,-56.472727,-69.627451,-59.761364,-38.917526,-71.666667,-53.233333,-80.539326,360
1.39-6.61,-59.756757,-51.281818,-62.23913,-64.566265,-48.92381,-60.79798,-50.490566,-77.767442,225
1.52-9.32,-65.383178,-50.366972,-63.359223,-67.48913,-50.041667,-65.103448,-49.385417,-78.655172,45
