In [30]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# SET PARAMETERS FOR ANALYSIS AND LOAD DATA

# configure your own bounds for analysis; week start and end reflect Sunday - Saturday
config = {
    'week_start': '03-05-2017',
    'week_end': '03-11-2017',
    'import_path': '/Users/jessicamcinchak/Downloads/weekly_scf_issues.csv',
    'export_path': 'metrics.csv',
    'print_stacked': False
}

# dict of Service Level Agreements by type with days committed to close
sla_days_to_close = {
    'Abandoned Vehicle': 5,
    'Illegal Dumping / Illegal Dump Sites': 10,
    'Tree Issue': 14,
    'Potholes': 5,
    'Residential Snow Removal Issue': 1,
    'Traffic Signal Issue': 14,
    'Traffic Sign Issue': 7,
    'Street Light Pole Down': 2,
    'New LED Street Light Out': 7,
    'Dead Animal Removal': 3,
    'Curbside Solid Waste Issue': 7,
    'Running Water in a Home or Building': 1,
    'Water Main Break': 1,
    'Fire Hydrant Issue': 1,
    'Manhole Cover Issue': 1,
    'Blocked Catch Basin': 1,
    'DPW - Debris Removal - DPW USE ONLY': 0,
    'DPW - Other environmental': 0,
    'Park Issue': 0 
}

# transform the SLA dict to a dataframe
sla_df = pd.DataFrame(list(sla_days_to_close.items()), columns=['request_type_title', 'sla_days_to_close'])

# import the SeeClickFix issues csv as a dataframe
scf_df = pd.read_csv(config['import_path'], low_memory=False)

# set better console display
pd.set_option('display.expand_frame_repr', False)

# check that the dimensions (rows, cols) of our dataframe are as expected
print(scf_df.shape)

(1500, 43)


In [34]:
# CLEAN UP THE DEFAULT SCF DATA FORMATS AND CREATE SOME NEW COLUMNS FOR SIMPLER ANALYSIS

# HELPER FUNCTIONS

# reformats object as mm-dd-yyyy
def simpleDate(obj):
    return datetime.strptime(obj[:-6], '%Y-%m-%dT%H:%M:%S').strftime('%m-%d-%Y')

# converts object into specific datetime object type '<M8[ns]'
def makeDateObj(obj):
    return datetime.strptime(obj[:-6], '%Y-%m-%dT%H:%M:%S')

# converts a timedelta to a float (eg '2 days 12:00:00' becomes '2.5')
def makeFloat(td):
    return td.total_seconds() / timedelta (days=1).total_seconds()

# check if issues were closed within their SLA; returns 1 if yes/under, 0 if no/over, or NaN if not yet closed
def slaCheck(row):
    if (row['days_create_to_close'] > 0):
        if row['days_create_to_close'] < sla_days_to_close[row['request_type_title']]:
            return 1
        return 0
    pass

# NEW COLUMNS

# add new column with simplified date, use to match config start and end
scf_df['created_at_simple'] = scf_df['created_at'].apply(lambda x: simpleDate(x))

# add new columns with datetime object, so we can do math on them
scf_df['created_at_obj'] = scf_df['created_at'].apply(lambda x: makeDateObj(x))
# only do this where closed_at is not nan
scf_df['closed_at_obj'] = scf_df['closed_at'].apply(lambda x: makeDateObj(x) if(pd.notnull(x)) else x)

# add new column with diff value, returns timedelta or NaT if the issue is not closed yet
scf_df['diff_create_to_close'] = scf_df['closed_at_obj'] - scf_df['created_at_obj']

# add new column with float (fractional days), so we can calculate median
scf_df['days_create_to_close'] = scf_df['diff_create_to_close'].apply(lambda x: makeFloat(x))

# add new column for indicating whether a closed issue is under or over its SLA
scf_df['within_sla_bool'] = scf_df.apply(lambda row: slaCheck(row), axis=1)

# check that we successfully added 6 new cols
print(scf_df.shape)

(1500, 49)


In [40]:
# FILTER THE FULL DATAFRAME BY THE TIME PARAMETERS WE SET

# convert object into datetime object
def convertStringToDate(obj):
    return datetime.strptime(obj, '%m-%d-%Y')

# store datetime values
a = scf_df['created_at_simple'].apply(lambda x: convertStringToDate(x))
b = convertStringToDate(config['week_start'])
c = convertStringToDate(config['week_end'])

# store datetime comparisons as bools
# filter for: start <= created_at_simple <= end
start = b <= a
end = a <= c

# filter all issues by configurable start and end dates, set as new dataframe
filtered_df = scf_df.loc[start & end]

# check that we successfully filtered rows
print(filtered_df.shape)

(1027, 49)


In [5]:
# WEEKLY ANALYTICS START HERE

print("""Stats for {} issues created from {} to {}""".format(len(filtered_df.index), config['week_start'], config['week_end']))

Stats for 1027 issues created from 03-05-2017 to 03-11-2017


In [41]:
# count issues by status (as of date of data we import)
status_count_df = filtered_df.groupby('status', as_index=False)['created_at'].count()
status_count_df.columns = ['request_status', 'number_issues']
print(status_count_df)

  request_status  number_issues
0   Acknowledged            383
1       Archived             17
2         Closed            596
3           Open             31


In [42]:
# count issues by type
type_count_df = filtered_df.groupby('request_type_title', as_index=False)['created_at'].count()
type_count_df.columns = ['request_type_title', 'num_issues']

# count just the ones that have been closed (this is the denominator for median_days_create_to_close)
type_closed_count_df = filtered_df.groupby('request_type_title', as_index=False)['closed_at'].count()
type_closed_count_df.columns = ['request_type_title', 'num_closed']

# merge and display as single dataframe
volume_df = pd.merge(type_count_df, type_closed_count_df, on='request_type_title', how='outer')
# print(volume_df)

In [43]:
# calculate median days from create to close for closed issues by type
med_type_df = filtered_df.groupby('request_type_title', as_index=False)['days_create_to_close'].median()
med_type_df.columns = ['request_type_title', 'median_days_create_to_close']

# compare to SLAs for each type, merge and display as single dataframe
days_compare_df = pd.merge(med_type_df, sla_df, on='request_type_title', how='outer')
print(days_compare_df)

                      request_type_title  median_days_create_to_close  sla_days_to_close
0                      Abandoned Vehicle                     1.830208                  5
1                    Blocked Catch Basin                          NaN                  1
2             Curbside Solid Waste Issue                     2.085995                  7
3    DPW - Debris Removal - DPW USE ONLY                     6.833490                  0
4              DPW - Other environmental                     1.258438                  0
5                    Dead Animal Removal                     0.000475                  3
6                     Fire Hydrant Issue                          NaN                  1
7   Illegal Dumping / Illegal Dump Sites                     2.105185                 10
8                    Manhole Cover Issue                     0.420816                  1
9               New LED Street Light Out                     1.753183                  7
10                   

In [44]:
# calculate number of issues by type that were closed within their SLA
num_sla_df = filtered_df.groupby('request_type_title', as_index=False)['within_sla_bool'].sum()
num_sla_df.columns = ['request_type_title', 'num_within_sla']

# merge and display as single dataframe
sla_df = pd.merge(volume_df, num_sla_df, on='request_type_title', how='outer')

# add a new column with the percent; num_within_sla divided by num_closed, times 100 for readability
sla_df['perc_within_sla'] = (sla_df['num_within_sla'] / sla_df['num_closed']) * 100
print(sla_df)

                      request_type_title  num_issues  num_closed  num_within_sla  perc_within_sla
0                      Abandoned Vehicle          57          26            26.0       100.000000
1                    Blocked Catch Basin          36           0             NaN              NaN
2             Curbside Solid Waste Issue          81          59            57.0        96.610169
3    DPW - Debris Removal - DPW USE ONLY          75          14             0.0         0.000000
4              DPW - Other environmental         108          85             0.0         0.000000
5                    Dead Animal Removal          29          29            29.0       100.000000
6                     Fire Hydrant Issue           5           0             NaN              NaN
7   Illegal Dumping / Illegal Dump Sites         102          62            62.0       100.000000
8                    Manhole Cover Issue          10           4             3.0        75.000000
9               New 

In [45]:
# count issues that were reopened by type
reopened_type_df = filtered_df.groupby('request_type_title', as_index=False)['reopened_at'].count()
reopened_type_df.columns = ['request_type_title', 'num_reopened']

# count issues that are marked as canonical (aka top-level duplicate) by type
canonical_type_df = filtered_df.groupby('request_type_title', as_index=False)['canonical_issue_id'].count()
canonical_type_df.columns = ['request_type_title', 'num_canonical']

# merge and display as single dataframe
num_compare_df = pd.merge(reopened_type_df, canonical_type_df, on='request_type_title', how='outer')
print(num_compare_df)

                      request_type_title  num_reopened  num_canonical
0                      Abandoned Vehicle             0              0
1                    Blocked Catch Basin             0              2
2             Curbside Solid Waste Issue             1              2
3    DPW - Debris Removal - DPW USE ONLY             1              0
4              DPW - Other environmental             0              0
5                    Dead Animal Removal             0              0
6                     Fire Hydrant Issue             0              0
7   Illegal Dumping / Illegal Dump Sites             2             12
8                    Manhole Cover Issue             0              0
9               New LED Street Light Out             0              0
10                            Park Issue             0              0
11                              Potholes             6              0
12   Running Water in a Home or Building             0              0
13                St

In [46]:
# EXPORT RESULTS

# merge various dataframes above into a single dataframe by type
merge_one = pd.merge(sla_df, num_compare_df, on='request_type_title', how='outer')
merge_all = pd.merge(merge_one, days_compare_df, on='request_type_title', how='outer')

# include just the columns we care about most
show_less = merge_all[['request_type_title', 'num_issues', 'num_closed', 'median_days_create_to_close', 'sla_days_to_close', 'perc_within_sla', 'num_reopened']]

# export as csv based on configurable printing variable; prints horizontally aligned by default with headers in row one
if config['print_stacked']:
    show_less.stack().to_csv(config['export_path'])
else:
    show_less.to_csv(config['export_path'])