In [None]:
from shapely.geometry import Point
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from datetime import date, timedelta, datetime
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster

loading may data - pickle and csv


In [None]:
%%time
loc_pkl = pd.read_pickle("../data/july.pkl")

## SECTION: DICTIONARY OF TIME-SEGMENT DATAFRAMES
This section makes a dictionary of dataframes. The dataframes can be "chunked up" into different time segments. The main things you need to adjust: the start and end variables, and the delta variable, which designates how "big" a "time chunk" should be. Datetime docs here: https://docs.python.org/3/library/datetime.html

In [None]:
#make a list of datetimes with datetime function 
start = datetime(2019, 7, 4, 0, 0)
end = datetime(2019, 7, 4, 23, 0)
delta = timedelta(hours=1)
timelist = []
timenamelist = []
while start <= end:
    timelist.append(start)
    timenamelist.append(start.strftime("%Y-%m-%d-%H-%M"))
    start += delta
# print(timelist)
# print(timenamelist)

In [None]:
#goal of function: using list of time frames, in this case, days, make a dataframe containing
#info about the location for each scooter, for each day


#make dictionary of dataframes using base dataframe and a user-defined segments_of_time_list
#from cell above
def make_day_df(datetime_list, timenames_list, dataframe, colname):
    resultslist = {} #turn into dictionary
    x = 0
    y = 1
    z = 0
    while x in range(0,len(datetime_list)-1):
        starttime = datetime_list[x]
        endtime = datetime_list[y]
        mask = (dataframe[colname] > starttime) & (dataframe[colname] <= endtime)
        framename = dataframe.loc[mask]
        x += 1
        y += 1
        print(x)
        resultslist[timenames_list[z]] = framename
        z += 1
    return resultslist
        
df_dict = make_day_df(timelist, timenamelist, loc_pkl, "pubdatetime")


## Converting Dictionary of DFs to Dictionary of GeoDFs, spatial-joined with Promise Zones for numerical analysis, not geospatial analysis

This section below is experimental geospatial stuff, adapted from our geospatial notebook for the most part. IF you want to mess with this bit, make sure you load the notebook in the geospatial enivronment.

In [None]:
promise_zones = gpd.read_file('../data/MDHA_Promise_Zones/Export_Output_5.shp')
print(promise_zones.crs)
promise_zones= promise_zones.to_crs('EPSG:4326')
print(promise_zones)

### This cell bellow adds a geography column to each dataframe in the df_dict

In [None]:
for key in df_dict:
    working_df = df_dict[key]
    working_df['geometry'] = working_df.apply(lambda x: Point(x['longitude'], 
                                                         x['latitude']), axis=1)

In [None]:
# test it
# df_dict['2019-05-10-19-00']

### Convert all dfs into geodfs, use promise zone crs, save in new dict

In [None]:
geodf_dict = {}
for key in df_dict:
    geodf_dict[key] = gpd.GeoDataFrame(df_dict[key],
                                  crs = promise_zones.crs,
                                  geometry = df_dict[key]['geometry'])
    

In [None]:
# test it
# print(geodf_dict.keys())
# print(type(geodf_dict['2019-05-10-00-00']))
# print(geodf_dict['2019-05-10-00-00'])

### Joining all geodfs in dict to promise zone 

In [None]:
for key in geodf_dict:
    geodf_dict[key] = gpd.sjoin(geodf_dict[key], promise_zones, op="within")

In [None]:
#test it
# geodf_dict['2019-05-11-06-00']['ZONE_ID'].value_counts(normalize=True)

In [None]:
#make a dataframe of normalized scooter value counts each hour by zone
scoots_by_hour_by_zone = pd.DataFrame()

for key in geodf_dict:
    the_series = geodf_dict[key]['ZONE_ID'].value_counts(normalize=True)
    new_col = pd.DataFrame({
                key : the_series
    }
    )
    scoots_by_hour_by_zone = pd.concat([scoots_by_hour_by_zone,new_col], axis=1)

In [None]:
scoots_by_hour_by_zone.transpose()

In [None]:
scoots_by_hour_by_zone.to_csv("../data/scoots_by_zones_csvs/" + str(start)[:10]+".csv")

### Produce images for Gif!!! Make sure to change titlevar and savefig path to correct day

In [None]:
#number of rows in promise zone id or in each zip
the_hour = 0
for key in geodf_dict:
    leg_kwds = {'title': 'Promise Zone', 'loc': 'upper left', 
            'bbox_to_anchor': (1, 1.03), 'ncol': 1}
    ax = promise_zones.plot(figsize = (8, 10), cmap='Set1', edgecolor = 'black',
              legend = True, legend_kwds = leg_kwds)
    geodf_dict[key].plot(ax = ax, column='sumdid')
    titlevar = "Thursday, July 4th, " + str(the_hour) + ":00" + " to " + str(the_hour + 1) + ":00"
    plt.title(titlevar)
    the_hour += 1
    plt.savefig("../maps/July_4_2019/" + key + "_map.png")
#     plt.show();

# output_geodf_and_promise['ZONE_ID'].value_counts(normalize=True)
# output_geodf_and_zip['zip'].value_counts()