In [None]:
#gets changed when notebook is autorun from main.ipynb
SCOUT_FOLDER_PATH = r"C:\Users\kowaleskim\Workspace\measure_the_future\mtf_data\Main 3 - MFA"

In [None]:
import matplotlib.pyplot as plt
import numpy as np

import matplotlib.dates as mpdates
from datetime import datetime, timedelta, date, time, MINYEAR
import pytz #time zone info
import calendar #for day abbreviations

from collections import Counter

import load_mtf_data
import os

from constants import DATE_RANGE
WEEKDAYS_TO_DISPLAY = [3] #Monday is 0, Sunday is 6
TIME_Y_AXIS_MAX = 500

In [None]:
#set matplotlib to display plots in notbook output rather than new window
%matplotlib inline

In [None]:
#(interactions, im) = load_mtf_data.combined_from_all_sets(SCOUT_FOLDER_PATH)
(interactions, im) = load_mtf_data.from_date_range(SCOUT_FOLDER_PATH, DATE_RANGE, WEEKDAYS_TO_DISPLAY)
#(interactions, im) = load_mtf_data.from_set(SCOUT_FOLDER_PATH, 'main3_12072017')
#interactions

Let's look at the enter times from the MtF data.  These are recorded in 15-minute bins, and we are interested in the number of interactions in each bin.

Make a Counter, a python object that can be constructed with a list and treated as a dictionary where the keys are distinct elements in the list and the values are the number of times that element is repeated.

In this case, the keys will be datetime objects representing the start time of a 15-minute bin, and the values will be the number of interactions in that bin.

In [None]:
interaction_time_counter = Counter(i['EnteredAt'] for i in interactions)
#interaction_time_counter

We are only interested in the times of day people tend to be in view of this scout, and thus our chart will combine data from several dates.  Set the date range and days of the week you want to include in the graph; for example, you might want to look at weekends in December or weekdays in 2018.

In [None]:
#compute the min and max dates that are both in the range specified above, and actually contain MTF data.
min_date_represented = min(interaction_time_counter).date()
max_date_represented = max(interaction_time_counter).date()
min_date_represented, max_date_represented

In [None]:
if WEEKDAYS_TO_DISPLAY == list(range(7)): s = "All"
else: s = ", ".join(list(map(lambda i: calendar.day_abbr[i], WEEKDAYS_TO_DISPLAY)))
    
#assemble plot title
title = '\n'.join([
    "Number of Interactions by Time of Day",
    "scout: " + os.path.basename(SCOUT_FOLDER_PATH),
    "dates between " + min_date_represented.strftime("%x") + " and " + max_date_represented.strftime("%x"),
    "weekdays included: " + s
])

Create a second Counter, called display_time_counter.   This counter is only interested in the time of day of each bin, not the date, so it will add together the counts for several bins on different dates but the same time.

It would be most sensible for the keys of display_time_counter to be time rather than datetime objects.  However, matplotlib is good at plotting datetime objects and very bad at plotting time objects.  So instead of converting from datetime to time, we will manually replace the date in each datetime object with an arbitary day (1/1/2000) while leaving the time the same.  Essentially we want to act as if all interactions are happening on the same day.

In [None]:
display_time_counter = Counter()
for dt in interaction_time_counter:
        
    #Datetimes should be in eastern right now. setting tzinfo=None in the replace call
    #kind of handwaves out timezone issues while keeping all the actual times as they were before.
    #matplotlib is weird about time zones so doing this ensures that it will display the actual time
    #of each interaction, as someone in EST would describe it.  It deals with daylight savings time correctly
    #and whatnot.
    display_dt = dt.replace(year=2000,month=1,day=1, tzinfo=None)
        
    count = interaction_time_counter[dt]
    display_time_counter.update({display_dt:count}) 
    #if display_dt is already a key, it adds count to the existing count rather than replacing
                
display_time_counter

In [None]:
#get the time bins and counts into two paralell lists which is how matplotlib likes them.
times, counts = zip(*display_time_counter.items())

In [None]:
#determine range of time represented in this plot for better tick labelling
second_range = (max(times)- min(times)).total_seconds()
hour_range = second_range / 3600.0
hour_range

In [None]:
#get axes object on which to plot
ax = plt.axes()

plt.ylim(0, TIME_Y_AXIS_MAX)

ax.bar(
    times, counts, 
    width=.011, 
    
    #since bins are represented from their start time, bars should be places with the times as thier left edge
    #rather than centered on the times as default.
    align='edge'   
)

#ax.xaxis_date()

#put a tick mark at each hour
ax.xaxis.set_minor_locator(mpdates.HourLocator())

#if there are more than 4 hours shown, put a label every 4 hours.  Otherwise every two hours.
if hour_range < 16: 
    ax.xaxis.set_major_locator(mpdates.HourLocator(interval=2))
else:
    ax.xaxis.set_major_locator(mpdates.HourLocator(interval=4))
    
#set display format for dates
ax.xaxis.set_major_formatter(mpdates.DateFormatter('%I\n%p'))

ax.set_title(title)

#add gridlines to better see where bars are located in time
#'both' means that both major and minor tickes get gridlines
#we only want them for the x axis
ax.grid(which='both', axis='x')

plt.savefig(SCOUT_FOLDER_PATH + os.sep + "time_" + os.path.basename(SCOUT_FOLDER_PATH) + "_" + str(DATE_RANGE[0]) + "_" + str(DATE_RANGE[1]) + ".jpg",
           bbox_inches='tight', dpi=300)