# Case Study 1 (Working file)

## Imports

In [226]:
# imports
import pandas as pd
import matplotlib.pyplot as plt
from math import floor

### Parameters

In [248]:
macs_to_keep = ['00:0f:a3:39:e1:c0',    
            '00:0f:a3:39:dd:cd', 
            '00:14:bf:b1:97:8a',
            '00:14:bf:3b:c7:c6',    
            '00:14:bf:b1:97:90',  
            '00:14:bf:b1:97:8d',
            '00:14:bf:b1:97:81',
            '02:00:42:55:31:00']

In [227]:
def round_angle(angle):
    buckets = [angle for angle in angle_buckets()]
    angle = angle if angle < 360 else angle-360
    angles = [abs(a - angle) for a in buckets]
    sorted_angles = angles.copy()
    sorted_angles.sort()
    min_val = sorted_angles[0]
    min_index = angles.index(min_val)
    return buckets[min_index]
    

def angle_buckets(start = 0, end = 360, step = 45):
     for i in range(0, int(round(end/step, 0)) + 1):
            val = start + i*step
            yield val


def get_train_data_by_angle(ref_angle=225, data=df, angles=3):
    ref_angle = round_angle(ref_angle)
    assert angles <= 7
    buckets = [angle for angle in angle_buckets()]
    start_index = buckets.index(ref_angle)- int(floor(angles/2))
    keep_angles = []
    for x in range(start_index, start_index + angles):
        keep_angles.append(buckets[x])
    print(keep_angles)
    data = data[data.orientation.isin(keep_angles)].copy()
    grouped_df = data.groupby(['mac', 'xy-loc'])['signal'].agg(['mean']).reset_index()
    grouped_df = pd.pivot_table(grouped_df, values='mean', columns='mac', index=['xy-loc'])
    return grouped_df

## Create DataFrame

In [228]:
column_names = ['time', 'scanMac', 'posX', 'posY', 'posZ', 'orientation', 'mac', 'signal', 'channel', 'type']

lines = None
with open('../Data/offline.final.trace.txt', 'r') as file:
    lines = file.readlines()
    file.close()

instances = [] # will hold final data for dataframe

for line in lines:
    
    # strip "\n" from line
    line = line.rstrip('\n')
    
    # skip if comment
    if line[0] == '#':
        continue

    base = [] # [t, id, x, y, z, degree]
    rows = [] # base + [mac, signal, channel, type]

    for keyvalue in line.split(';'):
        key, value = keyvalue.split('=')

        if key in ['t', 'id', 'degree']:
            base.append(value)
        elif key == 'pos':
            # pos (x, y, z)
            base += value.split(',')
        else:
            # mac addresses and metrics (signal, channel, type)
            row = base.copy()
            row.append(key)
            row += value.split(',')
            rows.append(row)
    
    instances += rows

df = pd.DataFrame(instances, columns = column_names)

print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1181628 entries, 0 to 1181627
Data columns (total 10 columns):
time           1181628 non-null object
scanMac        1181628 non-null object
posX           1181628 non-null object
posY           1181628 non-null object
posZ           1181628 non-null object
orientation    1181628 non-null object
mac            1181628 non-null object
signal         1181628 non-null object
channel        1181628 non-null object
type           1181628 non-null object
dtypes: object(10)
memory usage: 90.2+ MB
None


Unnamed: 0,time,scanMac,posX,posY,posZ,orientation,mac,signal,channel,type
0,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:14:bf:b1:97:8a,-38,2437000000,3
1,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:14:bf:b1:97:90,-56,2427000000,3
2,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:0f:a3:39:e1:c0,-53,2462000000,3
3,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:14:bf:b1:97:8d,-65,2442000000,3
4,1139643118358,00:02:2D:21:0F:33,0.0,0.0,0.0,0.0,00:14:bf:b1:97:81,-65,2422000000,3


In [250]:
column_names = ['time', 'scanMac', 'posX', 'posY', 'posZ', 'orientation', 'mac', 'signal', 'channel', 'type']

lines = None
with open('../Data/online.final.trace.txt', 'r') as file:
    lines = file.readlines()
    file.close()

instances = [] # will hold final data for dataframe

for line in lines:
    
    # strip "\n" from line
    line = line.rstrip('\n')
    
    # skip if comment
    if line[0] == '#':
        continue

    base = [] # [t, id, x, y, z, degree]
    rows = [] # base + [mac, signal, channel, type]

    for keyvalue in line.split(';'):
        key, value = keyvalue.split('=')

        if key in ['t', 'id', 'degree']:
            base.append(value)
        elif key == 'pos':
            # pos (x, y, z)
            base += value.split(',')
        else:
            # mac addresses and metrics (signal, channel, type)
            row = base.copy()
            row.append(key)
            row += value.split(',')
            rows.append(row)
    
    instances += rows

df_online = pd.DataFrame(instances, columns = column_names)

df_online.orientation = pd.to_numeric(df_online['orientation'])
df_online['mapped_orientation'] = df_online['orientation'].apply(lambda x: round_angle(x))

df_online['xy-loc'] = df_online['posX'] + '-' + df_online['posY']
print(df_online.info())

df_online['signal'] = pd.to_numeric(df_online['signal'])
df_online = df_online[df_online.mac.isin(macs_to_keep)]


df_online = df_online.groupby(['mac', 'xy-loc'])['signal'].agg(['mean']).reset_index()
df_online = pd.pivot_table(df_online, values='mean', columns='mac', index=['xy-loc'])
    
df_online.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53303 entries, 0 to 53302
Data columns (total 12 columns):
time                  53303 non-null object
scanMac               53303 non-null object
posX                  53303 non-null object
posY                  53303 non-null object
posZ                  53303 non-null object
orientation           53303 non-null float64
mac                   53303 non-null object
signal                53303 non-null object
channel               53303 non-null object
type                  53303 non-null object
mapped_orientation    53303 non-null int64
xy-loc                53303 non-null object
dtypes: float64(1), int64(1), object(10)
memory usage: 4.9+ MB
None


mac,00:0f:a3:39:dd:cd,00:0f:a3:39:e1:c0,00:14:bf:3b:c7:c6,00:14:bf:b1:97:81,00:14:bf:b1:97:8a,00:14:bf:b1:97:8d,00:14:bf:b1:97:90,02:00:42:55:31:00
xy-loc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.0-0.05,-63.207207,-52.227273,-62.94898,-61.813953,-40.068966,-63.043011,-55.233333,-86.369863
0.15-9.42,-66.117117,-55.275229,-73.961905,-72.701031,-47.813084,-69.454545,-46.88,-88.712766
0.31-11.09,-67.054054,-51.709091,-70.082474,-70.098901,-54.088235,-69.131579,-53.886598,-86.985507
0.47-8.2,-74.153153,-49.5,-64.258065,-72.597701,-45.652893,-60.797468,-49.58,-85.341463
0.78-10.94,-71.40367,-53.263636,-66.96,-66.809524,-48.413793,-65.0,-54.846939,-88.032258


## Exploratory Data Analysis (EDA)

In [230]:
df['mac'].value_counts()

00:0f:a3:39:e1:c0    145862
00:0f:a3:39:dd:cd    145619
00:14:bf:b1:97:8a    132962
00:14:bf:3b:c7:c6    126529
00:14:bf:b1:97:90    122315
00:14:bf:b1:97:8d    121325
00:14:bf:b1:97:81    120339
02:00:42:55:31:00    103887
02:64:fb:68:52:e6     50852
00:0f:a3:39:e0:4b     43508
02:2e:58:22:f1:ac     25112
00:0f:a3:39:e2:10     19162
02:37:fd:3b:54:b5      8732
02:b7:00:bb:a9:35      7602
02:5c:e0:50:49:de      6997
00:04:0e:5c:23:fc       418
00:30:bd:f8:7f:c5       301
00:e0:63:82:8b:a9       103
02:0a:3d:06:94:88         1
02:4f:99:43:30:cd         1
02:42:1c:4e:b5:c0         1
Name: mac, dtype: int64

In [231]:
df.describe()

Unnamed: 0,time,scanMac,posX,posY,posZ,orientation,mac,signal,channel,type
count,1181628,1181628,1181628.0,1181628.0,1181628.0,1181628.0,1181628,1181628,1181628,1181628
unique,146074,1,34.0,14.0,1.0,203.0,21,75,10,2
top,1139648662194,00:02:2D:21:0F:33,1.0,3.0,0.0,90.3,00:0f:a3:39:e1:c0,-59,2462000000,3
freq,21,1181628,109816.0,241614.0,1181628.0,26847.0,145862,40878,189774,978443


In [232]:
df = df[df.mac.isin(macs_to_keep)].copy()

In [233]:
df.posX.unique()

array(['0.0', '1.0', '2.0', '3.0', '4.0', '5.0', '6.0', '7.0', '8.0',
       '9.0', '10.0', '11.0', '12.0', '13.0', '14.0', '15.0', '16.0',
       '17.0', '18.0', '19.0', '20.0', '21.0', '22.0', '23.0', '24.0',
       '25.0', '26.0', '27.0', '28.0', '29.0', '30.0', '31.0', '32.0',
       '33.0'], dtype=object)

In [234]:
df.posY.unique()

array(['0.0', '1.0', '2.0', '3.0', '4.0', '5.0', '6.0', '7.0', '8.0',
       '9.0', '10.0', '11.0', '12.0', '13.0'], dtype=object)

In [235]:
df.orientation.unique()

array(['0.0', '45.2', '90.3', '135.9', '180.3', '225.6', '270.1', '315.0',
       '45.1', '90.7', '135.2', '180.1', '225.7', '269.7', '315.3', '0.3',
       '45.5', '90.5', '135.7', '180.2', '225.0', '270.9', '315.2', '0.7',
       '45.7', '89.8', '225.2', '269.9', '0.4', '90.4', '134.8', '180.4',
       '0.2', '45.6', '135.0', '179.6', '225.4', '269.5', '179.2',
       '224.5', '89.9', '134.4', '269.4', '359.9', '90.1', '135.1',
       '270.2', '314.8', '313.8', '45.8', '90.8', '134.9', '225.9',
       '270.5', '315.9', '45.4', '134.7', '180.0', '224.9', '269.8',
       '314.4', '0.1', '44.3', '225.3', '315.7', '0.5', '44.8', '89.4',
       '226.0', '315.1', '0.8', '89.2', '180.8', '224.8', '315.8', '44.4',
       '180.5', '314.9', '44.9', '179.8', '315.5', '135.6', '0.6',
       '135.4', '179.9', '225.1', '270.0', '45.3', '270.4', '45.0',
       '90.6', '46.0', '0.9', '44.6', '135.3', '90.2', '134.2', '44.5',
       '134.5', '226.1', '269.6', '270.3', '315.4', '1.5', '135.8',
       

In [236]:
df.orientation = pd.to_numeric(df['orientation'])
df['mapped_orientation'] = df['orientation'].apply(lambda x: round_angle(x))

In [237]:
df.signal = pd.to_numeric(df.signal)

In [238]:
df['xy-loc'] = df['posX'] + '-' + df['posY']
len(df['xy-loc'].unique())

166

In [239]:
df.drop(columns=['scanMac'], inplace=True)

In [245]:
df_online

Unnamed: 0,time,scanMac,posX,posY,posZ,orientation,mac,signal,channel,type,mapped_orientation,xy-loc
0,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,00:14:bf:b1:97:8a,-43,2437000000,3,135,0.0-0.05
1,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,00:0f:a3:39:e1:c0,-52,2462000000,3,135,0.0-0.05
2,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,00:14:bf:3b:c7:c6,-62,2432000000,3,135,0.0-0.05
3,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,00:14:bf:b1:97:81,-58,2422000000,3,135,0.0-0.05
4,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,00:14:bf:b1:97:8d,-62,2442000000,3,135,0.0-0.05
5,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,00:14:bf:b1:97:90,-57,2427000000,3,135,0.0-0.05
6,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,00:0f:a3:39:e0:4b,-79,2462000000,3,135,0.0-0.05
7,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,00:0f:a3:39:e2:10,-88,2437000000,3,135,0.0-0.05
8,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,00:0f:a3:39:dd:cd,-64,2412000000,3,135,0.0-0.05
9,1139692477303,00:02:2D:21:0F:33,0.0,0.05,0.0,130.5,02:64:fb:68:52:e6,-87,2447000000,1,135,0.0-0.05
