# Exploratory analysis of the incidents temperal pattern

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
plt.style.use('seaborn')
%matplotlib inline
pd.set_option('display.max_columns', 50)

In [None]:
q1 = pd.read_csv('data/Incidents/incidentsQ1.csv', index_col='callstart', infer_datetime_format=True, parse_dates=True)
q2 = pd.read_csv('data/Incidents/incidentsQ2.csv', index_col='callstart', infer_datetime_format=True, parse_dates=True)
q3 = pd.read_csv('data/Incidents/incidentsQ3.csv', index_col='callstart', infer_datetime_format=True, parse_dates=True)
q4 = pd.read_csv('data/Incidents/incidentsQ4.csv', index_col='callstart', infer_datetime_format=True, parse_dates=True)

In [None]:
incident_lsoa = year.groupby('lsoa')['incidentid'].count()

In [None]:
incident_lsoa.to_csv('incident_lsoa.csv')

In [None]:
year = pd.concat([q1, q2, q3, q4], axis=0)
year = year.applymap(lambda x: x.strip() if type(x)==str else x)

In [None]:
q3 = q3.applymap(lambda x: x.strip() if type(x)==str else x)

In [None]:
q3.columns

In [None]:
def plot_category(datafram, category='dohcategory', frequency='W'):
    df = datafram
    unique_ = df[category].unique()
    dfs = []
    plt.clf()
    plt.figure(figsize=(20,10))
    for c in unique_:
        df_c = df[df[category] == c].resample(frequency)['incidentid'].count()
        df_c.plot(label=str(c))
    plt.legend()
        

In [None]:
plot_category(year)

In [None]:
yW = year.resample('W')['incidentid'].count()

In [None]:
plt.clf()
plt.figure(figsize=(20,5))
yW.plot()
xticks = pd.date_range(start=yW.index.values.min(), end=yW.index.values.max(), freq='W')
#plt.xticks(xticks, xticks.strftime('%a %m-%d'), rotation=90)
plt.ylim(bottom=20000)
plt.hlines(y=yW.sum()/53, xmin=yW.index.values.min(), xmax=yW.index.values.max(), color='r', label='average(53W)')
plt.legend()
plt.plot()

In [None]:
q1h3 = q1.resample('3H')['incidentid'].count()

In [None]:
plt.clf()
plt.figure(figsize=(30,5))
q1h3.plot()
xticks = pd.date_range(start=q1h3.index.values.min(), end=q1h3.index.values.max(), freq='D')
plt.xticks(xticks, xticks.strftime('%a %m-%d'), rotation=90)
plt.show()

In [None]:
q1gb = q1.groupby(q1.index.hour)['incidentid'].count()

In [None]:
plt.figure(figsize=(8,5))
q1gb.plot(kind='bar')
plt.title('Totoal Incidents Breakdown by Hours')
plt.xlabel('Time of a Day')
plt.ylabel('Frequency')

In [None]:
gb = q1.groupby([q1.index.hour, q1.dohcategory])['incidentid'].count()

In [None]:
fig, ax = plt.subplots(figsize=(16,10))
gb.unstack().plot(ax=ax)
plt.title('Aggregate Hourly Trend for C1-C5')
plt.xlabel('Time of a Day')
plt.ylabel('Frequency')

# Mean response time per lsoa per category

In [None]:
year.tail()

In [None]:
lsogb = year.groupby(['lsoa', 'dohcategory'])['response_aqi'].mean()

In [None]:
lsoa = gpd.read_file('ESRI/LSOA_2011_London_gen_MHW.shp')

In [None]:
year.columns

In [None]:
q3['response_aqi_min'] = q3['response_aqi'] / 60

In [None]:
def plot_mean_percentile(C='C1', mean_=7, percentile_=15):
    #prepare dataframe, calculate stats, and geodataframize the dataset
    year_C = year[year.dohcategory == C]
    mean = year_C.groupby('lsoa')['response_aqi_min'].mean().reset_index().rename(columns={'response_aqi_min': "mean_response"})
    percentile90 = year_C.groupby('lsoa')['response_aqi_min'].quantile(0.9).reset_index().rename(columns={'response_aqi_min': "percentile90_response"})
    combined = pd.merge(mean, percentile90, on='lsoa')
    geo_combined = combined.merge(lsoa, left_on='lsoa', right_on='LSOA11CD')[['lsoa', 'mean_response', 'percentile90_response','geometry']]
    geo_combined = gpd.GeoDataFrame(geo_combined)
    
    #plot the mean_response
    fig, ax = plt.subplots(1, 2, figsize=(25, 10))
    geo_combined[geo_combined.mean_response > mean_].plot(column='mean_response', color='red', ax=ax[0], alpha=1)
    geo_combined.plot(column='mean_response', cmap='summer', legend=False, ax=ax[0], alpha=0.5)
    ax[0].set_axis_off()
    ax[0].set_title(str(C) + ' Mean Response_aqi < ' + str(mean_) + ' mins per lsoa', fontsize=20)
    
    #plot the 90 percentile response
    geo_combined[geo_combined.percentile90_response > percentile_].plot(column='percentile90_response', color='red', ax=ax[1], alpha=1)
    geo_combined.plot(column='percentile90_response', cmap='summer', legend=False, ax=ax[1], alpha=0.5)
    ax[1].set_axis_off()
    ax[1].set_title(str(C) + ' 90 Percentile Response_aqi < ' + str(percentile_) + ' mins per lsoa', fontsize=20)
    
    print('The percentage of lsoa that ', C, ' Mean Response_aqi < ', mean_, ' mins is: ', (geo_combined[geo_combined.mean_response < mean_].count() / geo_combined.mean_response.count())[0])
    print('The percentage of lsoa that ', C,' 90 Percentile Response_aqi < ', percentile_, ' mins is: ', 
          (geo_combined[geo_combined.percentile90_response < percentile_].count() / geo_combined.percentile90_response.count())[0])
    

In [None]:
plot_mean_percentile('C1', 7, 15)

In [None]:
plot_mean_percentile('C2', 18, 40)

In [None]:
plot_mean_percentile('C3', 60, 120)

In [None]:
plot_mean_percentile('C4', 180, 180)

In [None]:
year_c1 = year[year.dohcategory == 'C1']
year_c1 = year_c1.groupby('lsoa')['response_aqi_min'].quantile(0.9).reset_index().merge(lsoa, left_on='lsoa', 
                                                                             right_on='LSOA11CD')[['lsoa', 'response_aqi_min', 'geometry']]
year_c1 = gpd.GeoDataFrame(year_c1)
fig, ax = plt.subplots(1, 2, figsize=(25, 10))

year_c1[year_c1.response_aqi_min > 7].plot(column='response_aqi_min', color='red', ax=ax[0], alpha=1)
year_c1.plot(column='response_aqi_min', cmap='summer', legend=True, ax=ax[0], alpha=0.5)
ax[0].set_axis_off()
ax[0].set_title('C1 avg Response_aqi < 7 mins per lsoa')

year_c1[year_c1.response_aqi_min > 15].plot(column='response_aqi_min', color='red', ax=ax[1], alpha=1)
year_c1.plot(column='response_aqi_min', cmap='summer', legend=False, ax=ax[1], alpha=0.5)
ax[1].set_axis_off()
ax[1].set_title('C1 avg Response_aqi < 15 mins per lsoa')

In [None]:
year_c2 = year[year.dohcategory == 'C2']
year_c2 = year_c2.groupby('lsoa')['response_aqi_min'].mean().reset_index().merge(lsoa, left_on='lsoa', 
                                                                             right_on='LSOA11CD')[['lsoa', 'response_aqi_min', 'geometry']]
year_c2 = gpd.GeoDataFrame(year_c2)
fig, ax = plt.subplots(1, 2, figsize=(25, 10))

year_c2[year_c2.response_aqi_min > 18].plot(column='response_aqi_min', color='red', ax=ax[0], alpha=1)
year_c2.plot(column='response_aqi_min', cmap='summer', legend=True, ax=ax[0], alpha=0.5)
ax[0].set_axis_off()
ax[0].set_title('C2 avg Response_aqi < 18 mins per lsoa')

year_c2[year_c2.response_aqi_min > 40].plot(column='response_aqi_min', color='red', ax=ax[1], alpha=1)
year_c2.plot(column='response_aqi_min', cmap='summer', legend=False, ax=ax[1], alpha=0.5)
ax[1].set_axis_off()
ax[1].set_title('C2 avg Response_aqi < 40 mins per lsoa')

In [None]:
year_c3 = year[year.dohcategory == 'C3']
year_c3 = year_c3.groupby('lsoa')['response_aqi_min'].mean().reset_index().merge(lsoa, left_on='lsoa', 
                                                                             right_on='LSOA11CD')[['lsoa', 'response_aqi_min', 'geometry']]
year_c3 = gpd.GeoDataFrame(year_c3)
fig, ax = plt.subplots(1, 2, figsize=(25, 10))

year_c3[year_c3.response_aqi_min > 60].plot(column='response_aqi_min', color='red', ax=ax[0], alpha=1)
year_c3.plot(column='response_aqi_min', cmap='summer', legend=True, ax=ax[0], alpha=0.5)
ax[0].set_axis_off()
ax[0].set_title('C3 avg Response_aqi < 60 mins per lsoa')

year_c3[year_c3.response_aqi_min > 120].plot(column='response_aqi_min', color='red', ax=ax[1], alpha=1)
year_c3.plot(column='response_aqi_min', cmap='summer', legend=False, ax=ax[1], alpha=0.5)
ax[1].set_axis_off()
ax[1].set_title('C3 avg Response_aqi < 120 mins per lsoa')

In [None]:
year_c4 = year[year.dohcategory == 'C4']
year_c4 = year_c4.groupby('lsoa')['response_aqi_min'].mean().reset_index().merge(lsoa, left_on='lsoa', 
                                                                             right_on='LSOA11CD')[['lsoa', 'response_aqi_min', 'geometry']]
year_c4 = gpd.GeoDataFrame(year_c4)
fig, ax = plt.subplots(1, 1, figsize=(15, 8))
year_c4[year_c4.response_aqi_min > 180].plot(column='response_aqi_min', color='red', ax=ax, alpha=1)
year_c4.plot(column='response_aqi_min', cmap='summer', legend=True, ax=ax, alpha=0.5)
ax.set_axis_off()
ax.set_title('C4 avg Response_aqi < 180 mins per lsoa')