In [None]:
import pandas as pd
import numpy as np
import plotly
from plotly.graph_objs import *
plotly.offline.init_notebook_mode()

In [None]:
# Import GID data
df = pd.read_csv('./data/get_it_done_311_requests_datasd.csv', low_memory=False)

In [None]:
# List columns
df.columns

In [None]:
# Show subset of data
df.head(5)

In [None]:
# Show number of entries
print len(df.index)

In [None]:
# Remove outliers
districts = (1,2,3,4,5,6,7,8,9)
df = df[df['district'].isin(districts)]

In [None]:
# Focus on graffiti reports
graffiti_df = df[df['service_name']=='Graffiti']
graffiti_df = graffiti_df[graffiti_df['source']!='Crew/Self Generated']

In [None]:
# Show number of graffiti reports
print len(graffiti_df.index)

In [None]:
# Format timestamps
graffiti_df.loc[:,'open_timestamp'] = pd.to_datetime(graffiti_df['requested_datetime'], errors='coerce')
graffiti_df.loc[:,'closed_timestamp'] = pd.to_datetime(graffiti_df['closed_date'], errors='coerce')
# Convert timestamps to dates
graffiti_df.loc[:,'open_dt'] = pd.to_datetime(graffiti_df['requested_datetime'], errors='coerce').dt.date
graffiti_df.loc[:,'closed_dt'] = pd.to_datetime(graffiti_df['closed_date'], errors='coerce').dt.date

In [None]:
# Graffiti reporting evolution (day)
counts_day = graffiti_df.groupby(['open_dt']).size().to_frame(name='count').reset_index()

trace = Scatter(x=counts_day['open_dt'], y=counts_day['count'], mode='lines')
data = [trace]
layout = Layout(
    title='311 Graffiti reporting frequency',
    yaxis=dict(title='Reports (count)')
)
fig = Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename='graffiti-reporting-freq')

In [None]:
# Calculate duration between 'open' and 'close' for 'closed' graffiti reports

graffiti_df = graffiti_df.dropna(subset=['closed_dt'])
graffiti_df['duration'] = graffiti_df['closed_timestamp'] - graffiti_df['open_timestamp']
graffiti_df['duration'] = graffiti_df['duration'].dt.components.days

avg_duration = graffiti_df.groupby(['open_dt']).mean().reset_index()

trace = Scatter(x=avg_duration['open_dt'], y=avg_duration['duration'], mode='lines')
data = [trace]
layout = Layout(
    title="Duration to 'got it done' - Graffiti",
    yaxis=dict(title='Average duration')
)
fig = Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename='graffiti-duration-avg')