In [1]:
%%capture
%run 01_transform.ipynb

In [16]:
# SET YOUR OWN START AND END DATE FOR TIME RANGE ANALYSIS
# use 'mm-dd-yyyy' format

config = {
    'start_date': '03-05-2017',
    'end_date': '03-11-2017',
    'export_path': 'metrics.csv',
    'print_stacked': False
}

In [9]:
# FILTER SCF_DF FOR ISSUES WITHIN CONFIGURABLE DATE RANGE

# convert object into datetime
def makeDate(obj):
    return datetime.strptime(obj, '%m-%d-%Y')

# store datetime values
a = scf_df['created_at_date'].apply(lambda x: makeDate(x))
b = makeDate(config['start_date'])
c = makeDate(config['end_date'])

# do datetime comparisons, where 'start' and 'end' are booleans
# filter for: start <= created_at_date <= end
start = b <= a
end = a <= c

# filter all issues where start and end are True, set as new dataframe
filtered_df = scf_df.loc[start & end]

# check that we successfully filtered rows
print(filtered_df.shape)

(1027, 49)


In [6]:
# METRICS START HERE

print("""Stats for {} issues created from {} to {}""".format(len(filtered_df.index), config['start_date'], config['end_date']))

Stats for 1027 issues created from 03-05-2017 to 03-11-2017


In [10]:
# count issues by status
status_count_df = filtered_df.groupby('status', as_index=False)['created_at'].count()
status_count_df.columns = ['request_status', 'number_issues']
print(status_count_df)

  request_status  number_issues
0   Acknowledged            136
1       Archived            659
2         Closed            231
3           Open              1


In [11]:
# count issues by type
type_count_df = filtered_df.groupby('request_type_title', as_index=False)['created_at'].count()
type_count_df.columns = ['request_type_title', 'num_issues']

# count just the ones that have been closed (this is the denominator for median_days_create_to_close)
type_closed_count_df = filtered_df.groupby('request_type_title', as_index=False)['closed_at'].count()
type_closed_count_df.columns = ['request_type_title', 'num_closed']

# merge and display as single dataframe
volume_df = pd.merge(type_count_df, type_closed_count_df, on='request_type_title', how='outer')
print(volume_df)

                      request_type_title  num_issues  num_closed
0                      Abandoned Vehicle          57          41
1                    Blocked Catch Basin          36           4
2             Curbside Solid Waste Issue          73          71
3    DPW - Debris Removal - DPW USE ONLY         103          97
4              DPW - Other environmental         103         103
5                    Dead Animal Removal          29          29
6                     Fire Hydrant Issue           5           2
7   Illegal Dumping / Illegal Dump Sites          87          70
8                    Manhole Cover Issue          10           5
9               New LED Street Light Out          33          33
10                            Park Issue           1           0
11                              Potholes          93          93
12   Running Water in a Home or Building          22          21
13                Street Light Pole Down          10          10
14                    Tra

In [12]:
# calculate median days from create to close for closed issues by type
med_type_df = filtered_df.groupby('request_type_title', as_index=False)['days_create_to_close'].median()
med_type_df.columns = ['request_type_title', 'median_days_create_to_close']

# compare to SLAs for each type, merge and display as single dataframe
days_compare_df = pd.merge(med_type_df, sla_df, on='request_type_title', how='outer')
print(days_compare_df)

                      request_type_title  median_days_create_to_close  sla_days_to_close
0                      Abandoned Vehicle                     2.205602                  5
1                    Blocked Catch Basin                     5.408027                  1
2             Curbside Solid Waste Issue                     2.226019                  7
3    DPW - Debris Removal - DPW USE ONLY                     6.932477                  0
4              DPW - Other environmental                     1.985845                  0
5                    Dead Animal Removal                     0.000475                  3
6                     Fire Hydrant Issue                     9.841481                  1
7   Illegal Dumping / Illegal Dump Sites                     2.295683                 10
8                    Manhole Cover Issue                     0.438067                  1
9               New LED Street Light Out                     3.712766                  7
10                   

In [13]:
# calculate number of issues by type that were closed within their SLA
num_sla_df = filtered_df.groupby('request_type_title', as_index=False)['within_sla_bool'].sum()
num_sla_df.columns = ['request_type_title', 'num_within_sla']

# merge and display as single dataframe
sla_df = pd.merge(volume_df, num_sla_df, on='request_type_title', how='outer')

# add a new column with the percent; num_within_sla divided by num_closed, times 100 for readability
sla_df['perc_within_sla'] = (sla_df['num_within_sla'] / sla_df['num_closed']) * 100
print(sla_df)

                      request_type_title  num_issues  num_closed  num_within_sla  perc_within_sla
0                      Abandoned Vehicle          57          41            29.0        70.731707
1                    Blocked Catch Basin          36           4             0.0         0.000000
2             Curbside Solid Waste Issue          73          71            68.0        95.774648
3    DPW - Debris Removal - DPW USE ONLY         103          97             0.0         0.000000
4              DPW - Other environmental         103         103             0.0         0.000000
5                    Dead Animal Removal          29          29            29.0       100.000000
6                     Fire Hydrant Issue           5           2             0.0         0.000000
7   Illegal Dumping / Illegal Dump Sites          87          70            70.0       100.000000
8                    Manhole Cover Issue          10           5             3.0        60.000000
9               New 

In [14]:
# count issues that were reopened by type
reopened_type_df = filtered_df.groupby('request_type_title', as_index=False)['reopened_at'].count()
reopened_type_df.columns = ['request_type_title', 'num_reopened']

# count issues that are marked as canonical (aka top-level duplicate) by type
canonical_type_df = filtered_df.groupby('request_type_title', as_index=False)['canonical_issue_id'].count()
canonical_type_df.columns = ['request_type_title', 'num_canonical']

# merge and display as single dataframe
num_compare_df = pd.merge(reopened_type_df, canonical_type_df, on='request_type_title', how='outer')
print(num_compare_df)

                      request_type_title  num_reopened  num_canonical
0                      Abandoned Vehicle             1              0
1                    Blocked Catch Basin             0              2
2             Curbside Solid Waste Issue             1              2
3    DPW - Debris Removal - DPW USE ONLY             1              0
4              DPW - Other environmental             0              0
5                    Dead Animal Removal             0              0
6                     Fire Hydrant Issue             0              0
7   Illegal Dumping / Illegal Dump Sites             2             14
8                    Manhole Cover Issue             0              0
9               New LED Street Light Out             0              0
10                            Park Issue             0              0
11                              Potholes             6              0
12   Running Water in a Home or Building             0              0
13                St

In [17]:
# EXPORT RESULTS

# merge various dataframes above into a single dataframe by type
merge_one = pd.merge(sla_df, num_compare_df, on='request_type_title', how='outer')
merge_all = pd.merge(merge_one, days_compare_df, on='request_type_title', how='outer')

# include just the columns we care about most
show_less = merge_all[['request_type_title', 'num_issues', 'num_closed', 'median_days_create_to_close', 'sla_days_to_close', 'perc_within_sla', 'num_reopened']]

# export as csv based on configurable printing variable; prints horizontally aligned by default with headers in row one
if config['print_stacked']:
    show_less.stack().to_csv(config['export_path'])
else:
    show_less.to_csv(config['export_path'])