# Data analysis

### Read data

In [1]:
from pandas import read_csv
data = read_csv('../../data/dt.consecutive.csv', nrows=100000)

### Convert time data to python datetime object

In [2]:
from datetime import datetime

def format(time):
    return datetime.strptime(time, '%Y-%m-%d %H:%M:%S')

data['time'] = [format(time) for time in data['time']]
data = data[data['time'] > format('2019-12-17 00:00:00')][data['time'] < format('2020-01-26 00:00:00')]

### Number of unique MAC addresses each day

In [None]:
daily = data.groupby(data["time"].dt.floor('d'))['MAC'].nunique().reset_index(name='All day')

### Number of unique MAC addresses each day in working hours

In [None]:
from pandas import merge

work = data[data['time'].dt.hour > 7][data['time'].dt.hour < 20]
daily = merge(daily, work.groupby(work["time"].dt.floor('d'))['MAC'].nunique().reset_index(name='Working hours'), how='outer', on='time')
daily.fillna(0, inplace=True)
del work

### Find weekend indexes

In [None]:
time = daily['time']
weekends = [day for day in range(len(time)) if time[day].weekday() > 4]

### Plot number of unique MAC addresses each day

In [None]:
import matplotlib.pyplot as plt
from matplotlib import dates

fig, ax = plt.subplots()
ax.plot_date(time, daily.drop('time', axis=1), '-')
# ax.axvspan(*dates.datestr2num(['12/09/2019', '12/17/2019']), color='red', alpha=0.5)
# ax.axvspan(*dates.datestr2num(['12/17/2019', '01/26/2020']), color='green', alpha=0.5)
# for weekend in weekends:
#     ax.axvspan(time[weekend], time[weekend + 1], color='yellow', alpha=0.5)
fig.autofmt_xdate()
plt.savefig('daily2.jpg', dpi=1200)
plt.show()

### Number of unique MAC addresses each hour

hourly = data.groupby(data["time"].dt.floor('H'))['MAC'].nunique().reset_index()
fig, ax = plt.subplots()
ax.plot_date(hourly['time'], hourly.drop('time', axis=1), '-')
ax.axvspan(*dates.datestr2num(['12/09/2019', '12/17/2019']), color='red', alpha=0.5)
ax.axvspan(*dates.datestr2num(['12/17/2019', '01/26/2020']), color='green', alpha=0.5)
for weekend in weekends:
    ax.axvspan(time[weekend], time[weekend + 1], color='yellow', alpha=0.5)
fig.autofmt_xdate()
plt.savefig('hourly.jpg', dpi=1200)
plt.show()

### Weeks for each month

for month in data['time'].dt.month.unique():
    graph = data[data['time'].dt.month == month].groupby([data["time"].dt.week, data['time'].dt.day])['MAC'].nunique().unstack(level=0).add_prefix('Week ').plot(kind='line', title='Month_' + str(month))
    graph.legend(bbox_to_anchor=(1, 1))
    graph.set_xlabel('Day')
    graph.axvspan(5, 7, color='red', alpha=0.5)
    plt.savefig('Month_' + str(month), dpi=1200)

### Weekdays for each month

for month in data['time'].dt.month.unique():
    weekdays = data[data['time'].dt.month == month].groupby([data['time'].dt.week, data["time"].dt.weekday, data['time'].dt.hour])['MAC'].nunique().unstack(level=1)
    for weekday in weekdays:
        graph = weekdays[weekday].unstack(level=0).plot(kind='line', title='Month_' + str(month) + '_weekday_' + str(weekday))
        graph.legend(bbox_to_anchor=(1, 1), title='Week')
        graph.set_xlabel('Hour')
        plt.savefig('Month_' + str(month) + '_weekday_' + str(weekday), dpi=1200)