In [1]:
%%capture
%run 01_transform.ipynb

In [2]:
# SET YOUR OWN START AND END DATE FOR TIME RANGE ANALYSIS
# use 'mm-dd-yyyy' format

config = {
    'start_date': '01-01-2017',
    'end_date': '05-13-2017',
    'export_path': '/home/jessica/Desktop/metrics_ytd.csv',
    'print_stacked': False
}

In [3]:
# FILTER SCF_DF FOR ISSUES WITHIN CONFIGURABLE DATE RANGE

# convert object into datetime
def makeDate(obj):
    return datetime.strptime(obj, '%m-%d-%Y')

# store datetime values
a = scf_df['created_at_date'].apply(lambda x: makeDate(x))
b = makeDate(config['start_date'])
c = makeDate(config['end_date'])

# do datetime comparisons, where 'start' and 'end' are booleans
# filter for: start <= created_at_date <= end
start = b <= a
end = a <= c

# filter all issues where start and end are True, set as new dataframe
filtered_df = scf_df.loc[start & end]

# check that we successfully filtered rows
print(filtered_df.shape)

(16373, 52)


In [4]:
# METRICS START HERE

print("""Stats for {} issues created from {} to {}""".format(len(filtered_df.index), config['start_date'], config['end_date']))

Stats for 16373 issues created from 01-01-2017 to 05-13-2017


In [5]:
# count issues by status
status_count_df = filtered_df.groupby('status', as_index=False)['created_at'].count()
status_count_df.columns = ['request_status', 'number_issues']
print(status_count_df)

  request_status  number_issues
0   Acknowledged           2280
1       Archived          13139
2         Closed            892
3           Open             62


In [6]:
# count issues by type
type_count_df = filtered_df.groupby('request_type_title', as_index=False)['created_at'].count()
type_count_df.columns = ['request_type_title', 'num_issues']

# count just the ones that have been closed (this is the denominator for median_days_create_to_close)
type_closed_count_df = filtered_df.groupby('request_type_title', as_index=False)['closed_at'].count()
type_closed_count_df.columns = ['request_type_title', 'num_closed']

# merge and display as single dataframe
volume_df = pd.merge(type_count_df, type_closed_count_df, on='request_type_title', how='outer')
print(volume_df)

                      request_type_title  num_issues  num_closed
0                      Abandoned Vehicle        1403        1330
1                    Blocked Catch Basin        1517         275
2             Curbside Solid Waste Issue        1755        1573
3    DPW - Debris Removal - DPW USE ONLY        2691        2535
4              DPW - Other environmental        1950        1817
5                    Dead Animal Removal         219         215
6                     Fire Hydrant Issue         212         167
7   Illegal Dumping / Illegal Dump Sites        1705        1393
8                    Manhole Cover Issue         180         105
9               New LED Street Light Out         342         340
10                            Park Issue          65          23
11                              Potholes        1397        1303
12        Residential Snow Removal Issue          16          14
13   Running Water in a Home or Building         414         411
14                Street 

In [7]:
# calculate median days from create to close for closed issues by type
med_type_df = filtered_df.groupby('request_type_title', as_index=False)['days_create_to_close'].median()
med_type_df.columns = ['request_type_title', 'median_days_create_to_close']

# compare to SLAs for each type, merge and display as single dataframe
days_compare_df = pd.merge(med_type_df, sla_df, on='request_type_title', how='outer')
days_compare_df

Unnamed: 0,request_type_title,median_days_create_to_close,sla_days_to_close
0,Abandoned Vehicle,3.88691,5
1,Blocked Catch Basin,13.133519,1
2,Curbside Solid Waste Issue,6.004167,7
3,DPW - Debris Removal - DPW USE ONLY,8.013576,0
4,DPW - Other environmental,2.410868,0
5,Dead Animal Removal,0.000602,3
6,Fire Hydrant Issue,7.40515,1
7,Illegal Dumping / Illegal Dump Sites,4.123796,10
8,Manhole Cover Issue,0.66838,1
9,New LED Street Light Out,1.888056,7


In [8]:
# calculate number of issues by type that were closed within their SLA
num_sla_df = filtered_df.groupby('request_type_title', as_index=False)['within_sla_bool'].sum()
num_sla_df.columns = ['request_type_title', 'num_within_sla']

# merge and display as single dataframe
sla_df = pd.merge(volume_df, num_sla_df, on='request_type_title', how='outer')

# add a new column with the percent; num_within_sla divided by num_closed, times 100 for readability
sla_df['perc_within_sla'] = (sla_df['num_within_sla'] / sla_df['num_closed']) * 100
sla_df

Unnamed: 0,request_type_title,num_issues,num_closed,num_within_sla,perc_within_sla
0,Abandoned Vehicle,1403,1330,830.0,62.406015
1,Blocked Catch Basin,1517,275,8.0,2.909091
2,Curbside Solid Waste Issue,1755,1573,1012.0,64.335664
3,DPW - Debris Removal - DPW USE ONLY,2691,2535,0.0,0.0
4,DPW - Other environmental,1950,1817,0.0,0.0
5,Dead Animal Removal,219,215,201.0,93.488372
6,Fire Hydrant Issue,212,167,13.0,7.784431
7,Illegal Dumping / Illegal Dump Sites,1705,1393,1318.0,94.615937
8,Manhole Cover Issue,180,105,56.0,53.333333
9,New LED Street Light Out,342,340,318.0,93.529412


In [9]:
# count issues that were reopened by type
reopened_type_df = filtered_df.groupby('request_type_title', as_index=False)['reopened_at'].count()
reopened_type_df.columns = ['request_type_title', 'num_reopened']

# count issues that are marked as canonical (aka top-level duplicate) by type
canonical_type_df = filtered_df.groupby('request_type_title', as_index=False)['canonical_issue_id'].count()
canonical_type_df.columns = ['request_type_title', 'num_canonical']

# merge and display as single dataframe
num_compare_df = pd.merge(reopened_type_df, canonical_type_df, on='request_type_title', how='outer')
num_compare_df

Unnamed: 0,request_type_title,num_reopened,num_canonical
0,Abandoned Vehicle,20,12
1,Blocked Catch Basin,4,168
2,Curbside Solid Waste Issue,21,100
3,DPW - Debris Removal - DPW USE ONLY,43,5
4,DPW - Other environmental,21,25
5,Dead Animal Removal,3,0
6,Fire Hydrant Issue,1,7
7,Illegal Dumping / Illegal Dump Sites,40,179
8,Manhole Cover Issue,2,15
9,New LED Street Light Out,13,0


In [10]:
# count issues that were closed because they're out of City jurisdiction
jurisdiction_df = filtered_df.groupby('request_type_title', as_index=False)['beyond_jurisdiction'].sum()
jurisdiction_df.columns = ['request_type_title', 'num_out_jurisdiction']

jurisdiction_df

Unnamed: 0,request_type_title,num_out_jurisdiction
0,Abandoned Vehicle,25
1,Blocked Catch Basin,0
2,Curbside Solid Waste Issue,47
3,DPW - Debris Removal - DPW USE ONLY,4
4,DPW - Other environmental,43
5,Dead Animal Removal,6
6,Fire Hydrant Issue,0
7,Illegal Dumping / Illegal Dump Sites,86
8,Manhole Cover Issue,0
9,New LED Street Light Out,15


In [11]:
# EXPORT RESULTS

# merge various dataframes above into a single dataframe by type
merge_one = pd.merge(sla_df, num_compare_df, on='request_type_title', how='outer')
merge_two = pd.merge(merge_one, days_compare_df, on='request_type_title', how='outer')
merge_all = pd.merge(merge_two, jurisdiction_df, on='request_type_title', how='outer')

# include just the columns we care about most
show_less = merge_all[['request_type_title', 'num_issues', 'num_closed', 'median_days_create_to_close', 'sla_days_to_close', 'perc_within_sla', 'num_reopened', 'num_canonical', 'num_out_jurisdiction']]

# export as csv based on configurable printing variable; prints horizontally aligned by default with headers in row one
if config['print_stacked']:
    show_less.stack().to_csv(config['export_path'])
else:
    show_less.to_csv(config['export_path'])