In [11]:
%matplotlib inline
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
# read in the AP and client master CSVs
ap_df = pd.read_csv('data_ap.csv')
client_df = pd.read_csv('data_client.csv')

print ap_df.columns
print client_df.columns

Index([u'BSSID', u'First time seen', u'Last time seen', u'channel', u'Speed',
       u'Privacy', u'Cipher', u'Authentication', u'Power', u'# beacons',
       u'# IV', u'LAN IP', u'ID-length', u'ESSID', u'Key'],
      dtype='object')
Index([u'Station MAC', u'First time seen', u'Last time seen', u'Power',
       u'# packets', u'BSSID', u'Probed ESSIDs'],
      dtype='object')


In [13]:
############################################
# Start with timestamp conversion and analysis.
# This is to figure out the number of seconds
# that have elapsed since the start of the experiment
# for all of the wifi data we have collected.

# Start by converting all the "First time seen" and "Last time seen" columns to Timestamps

# access points
ap_df['First time seen'] = ap_df['First time seen'].apply(pd.to_datetime)
ap_df['Last time seen']  = ap_df['Last time seen'].apply(pd.to_datetime)

# clients
client_df['First time seen'] = client_df['First time seen'].apply(pd.to_datetime)
client_df['Last time seen']  = client_df['Last time seen'].apply(pd.to_datetime)


# sort all of the "first time seen" fields and find the very, very first one.
# that timestamp will mark the start of the experiment.
times = client_df['First time seen'].append( ap_df['First time seen'] )
sorted_times = times.sort_values(inplace=False).unique()
experiment_begin = times.sort_values(inplace=False).values[0]

In [14]:
print pd.Timestamp(experiment_begin)

2016-02-27 06:10:04


In [15]:
# Make a copy of the first time seen vector,
# so we can convert it into number of seconds
# that have elapsed and insert it back into the dataframe
# (We will use this as the timestamp for each observation)
client_times = client_df['First time seen'].copy(deep=True)
client_times.name = 'Seconds Elapsed'

ap_times = ap_df['First time seen'].copy(deep=True)
ap_times.name = 'Seconds Elapsed'



print "========= Timestamp Format =============="
print client_times[0:5]
print ap_times[0:5]



# Make a vector of the time deltas 
# (The time delta is the number of seconds elapsed since 
#  the beginning of the experiment.)
client_time_deltas = client_times - experiment_begin
client_time_deltas = client_time_deltas.astype('timedelta64[s]')

ap_time_deltas = ap_times - experiment_begin
ap_time_deltas = ap_time_deltas.astype('timedelta64[s]')



print "========= Time Delta Format =============="
print client_time_deltas[0:8]
print ap_time_deltas[0:8]

0   2016-02-27 06:10:20
1   2016-02-27 06:10:04
2   2016-02-27 06:10:20
3   2016-02-27 06:10:04
4   2016-02-27 06:10:15
Name: Seconds Elapsed, dtype: datetime64[ns]
0   2016-02-27 06:10:15
1   2016-02-27 06:10:19
2   2016-02-27 06:10:15
3   2016-02-27 06:10:15
4   2016-02-27 06:10:19
Name: Seconds Elapsed, dtype: datetime64[ns]
0    16
1     0
2    16
3     0
4    11
5    15
6    16
7    11
Name: Seconds Elapsed, dtype: float64
0    11
1    15
2    11
3    11
4    15
5     0
6    11
7     0
Name: Seconds Elapsed, dtype: float64


In [16]:
# Since we ran a 2 hour experiment, 
# the last time delta should be 7200 s = 120 minutes = 2 hours
print client_time_deltas.sort_values(inplace=False).values[-1]

print "Bingo!"

7197.0
Bingo!


In [18]:
# Now we have a column that is number of seconds elapsed 
# since the start of the experiment.
# 
# Add that as a column to the data frame.
client_df['Seconds Elapsed'] = client_time_deltas
ap_df['Seconds Elapsed'] = ap_time_deltas


# Save this new time information to a new csv file.
ap_df.to_csv('data_ap_time.csv')
client_df.to_csv('data_client_time.csv')
