## Plot bike theft reports from January 1, 2018 to April 3, 2020

A jupyter notebook that gathers bike theft data from bikewise.org and generates multiple line plots in parallel.<br>
The two notable libraries are:

    1) concurrent.futures
    2) bikewise
    
Execute cells in a linear fashion to produce the figures.

In [1]:
"""Define functions to parallelize
gathering bike theft reports."""

import concurrent.futures
import datetime
import os
import sys
import time
from bikewise import BikeWise
BASE_DIR = os.path.abspath('')


def datetime_to_unixtime(datetime_obj):
    """convert datetime obj to unixtime"""
    unixtime = int(time.mktime(datetime_obj.timetuple()))
    return unixtime


def total_stolen_perday(args):
    """find theft reports in a given day"""
    iterable = args[0]
    start_date = args[1]
    one_day = 86400  # 86400 seconds in a day
    today = start_date + one_day * iterable
    tomorrow = start_date + one_day * (1 + iterable)
    
    # make request to bikewise.org API
    bike = BikeWise()
    theft_incidents = bike.locations.markers(limit=10000, incident_type='theft', occurred_after=today, occurred_before=tomorrow) 
    theft_locations = [incidents['geometry']['coordinates'] for incidents in theft_incidents['features']]
    # remove duplicate theft incidents in theft_locations
    set_theft = set(tuple(location) for location in theft_locations)
    unique_list_theft = [list(location) for location in set_theft]
    date_today = datetime.datetime.utcfromtimestamp(today).strftime('%Y-%m-%d')
    
    # store results in dictionary
    result = {'date': date_today, 'locations': unique_list_theft, 'total_theft': len(theft_incidents['features'])}
    sys.stdout.write("\rProcessing date: {}".format(date_today))
    sys.stdout.flush()
    
    return result
    
    
def _futures_threads(transform, iterable):
    """use a thread pool to execute
    calls asynchronously"""
    with concurrent.futures.ThreadPoolExecutor() as executor:
        result = executor.map(transform, iterable)
    return result


In [2]:
"""Use functions above to gather bike
theft reports from January 1, 2018 to
April 3, 2020."""

# Determine start and end dates - find days in between
start_date = datetime.date(2018,1,1)
end_date = datetime.date(2020,4,3)
delta_days = (end_date - start_date).days


# Convert start date to UNIX UTC time and create tuple of args
# to feed into _futures_threads
unix_start_date = datetime_to_unixtime(start_date)
args = ((i, unix_start_date) for i in range(delta_days))


if __name__ == '__main__':
    """make list from generator
    via _futures_threads"""
    result = list(_futures_threads(total_stolen_perday, args))


Processing date: 2020-04-02

In [3]:
"""Use matplotlib to plot gathered information
in parallel. Save figs in directory ./figs"""

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import warnings
warnings.filterwarnings("ignore")


def plot(args):
    """plot line graph showing current and past
    bike theft counts"""
    values = [len(event['locations']) for event in result]        
    days = [datetime.datetime.strptime(event['date'],'%Y-%m-%d').date() for event in result]
    index = args[0]
    day = args[1]
    locations = args[2]
    
    plt.figure(figsize=(16,8))
    plt.clf()
    plt.title('Reported Bike Thefts on Bikewise', fontweight='bold', fontsize=16)
    plt.xlabel('Date', fontsize=14)
    plt.ylabel('Reported Thefts', fontsize=14)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
    plt.plot(days[:index], values[:index], color='r')
    plt.gcf().autofmt_xdate()
    plt.savefig('fig{}.png'.format(index))

    
def _futures_plot(transform, iterable):
    """use a process pool to execute
    calls asynchronously"""
    with concurrent.futures.ProcessPoolExecutor() as executor:
        for index, item in enumerate(executor.map(transform, iterable)):
            sys.stdout.write("\rProcessing figure: {}".format(index))
            sys.stdout.flush()
            
            
if __name__ == '__main__':
    """Change dir to ./figs and generate args
    for parallel plotting via _futures_plot."""
    os.chdir("{}/figs".format(BASE_DIR))
    args = ((index, 
         datetime.datetime.strptime(event['date'], '%Y-%m-%d').date(),
         len(event['locations']))
         for index, event in enumerate(result)) 
    
    _futures_plot(plot, args)


Processing figure: 822