In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import datetime as dt
import math
import statsmodels.api as sm

#load data
df = pd.read_csv('https://raw.githubusercontent.com/dareng5/DataSets/master/Social_Distancing_and_Enforcement__Parks_Patrol_Data.csv', parse_dates=['encounter_datetime'], infer_datetime_format=True)

#drop rows without date
df = df.dropna(subset=['encounter_datetime','simplified_encounter_type'])

#breakdown the datetime
df['month'] = df.encounter_datetime.dt.month_name()
df['hour'] = df.encounter_datetime.dt.hour
df['weekofyear'] = df.encounter_datetime.dt.weekofyear
df['dayofyear'] = df.encounter_datetime.dt.dayofyear

#assign shift from hour
df['shift'] = pd.cut(df['hour'], [-1,7,15,23],labels=['0000-0800','0800-1600','1600-2400'])

#adding patrol count
df['count'] = 1

#combining amenity
df['amenity'] = df['closed_amenity'].fillna('') + df['sd_amenity'].fillna('')
df = df.replace({'amenity':{'Beach - Water':'Beach', 'Bench/sitting area':'Beach', 'Dog Run':'Dog run', 'Tennis court':'Tennis courts'}})

#print(df.head())
#df.dtypes

In [None]:
#showing results of patrols by borough over time
dfborough = df.groupby(['month','park_borough','simplified_encounter_type'])['count'].sum().reset_index()

g1 = sns.catplot(x='park_borough', y='count', hue='simplified_encounter_type',
                 col='month', col_wrap=3, col_order=['April','May','June','July','August'],
                 data=dfborough, kind='bar',palette=sns.color_palette('muted'),
                 height=4, aspect=1, legend=False)
plt.legend(bbox_to_anchor=(1.2, 0.3), loc='upper left', borderaxespad=0.)
plt.subplots_adjust(top=.87)
g1.fig.suptitle('Patrols result by borough\nFor period 04/14/20 thru 08/02/20')
g1.set_xticklabels(rotation=30)
g1.set_axis_labels('Borough')
plt.show(g1)

In [None]:
#showing encounter by Amenity over time
dfanemity = df[(df['simplified_encounter_type']!='No Encounter')]
dfanemity = dfanemity.groupby(['simplified_encounter_type','weekofyear','amenity'])['count'].sum().reset_index()

g2 = sns.catplot(x='weekofyear', y='count', hue='simplified_encounter_type',
                 col='amenity', col_wrap=8,
                 data=dfanemity, kind='point',
                 height=4, aspect=.8, legend_out=False)
plt.subplots_adjust(top=.9)
g2.fig.suptitle('Encounter count by Amenity\nFor period 04/14/20 thru 08/02/20')
g2.set_titles("{col_name}")
g2.set_xticklabels(rotation=90)
g2.set_axis_labels('Week of Year')
g2._legend.set_title('')
plt.show(g2)

In [None]:
#showing encounter by Amenity over time
dfshift = df[(df['simplified_encounter_type']!='No Encounter')]
dfshift = dfshift.groupby(['shift','weekofyear','amenity'])['count'].sum().reset_index()

g3 = sns.catplot(x='weekofyear', y='count', hue='shift',
                 col='amenity', col_wrap=8,
                 data=dfshift, kind='point',
                 height=4, aspect=.8, legend_out=False)
plt.subplots_adjust(top=.9)
g3.fig.suptitle('Encounter count on shift by Amenity\nFor period 04/14/20 thru 08/02/20')
g3.set_titles("{col_name}")
g3.set_xticklabels(rotation=90)
g3.set_axis_labels('Week of Year')
g3._legend.set_title('')
plt.show(g3)

In [None]:
#rending on social distancing encounter
dftrend = df[(df['simplified_encounter_type']=='Social Distancing')]
dftrend = dftrend.groupby(['dayofyear'])['count'].sum().reset_index()

#fit a linear model using OLS (ordinary least squares) method
model = sm.OLS(dftrend['count'], sm.add_constant(dftrend['dayofyear']))
results = model.fit()
print(results.summary())

#plot graph
g4 = sns.regplot(x='dayofyear', y='count', data=dftrend, truncate=False)
plt.show(g4)

#estimate date where social distancing encounter = 0
d1 = int(math.ceil(results.params['const']/-results.params['dayofyear']))
d2 = pd.to_datetime(2020 * 1000 + d1, format='%Y%j').date()
print('\nThe estimated date of no scoial distancing encounter from patrols is ' + str(d2) + '.')