In [59]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# IMPORT OUR DATA AND PREP IT FOR ANALYSIS

# set better console display
pd.set_option('display.expand_frame_repr', False)

# import the SeeClickFix issues csv as a dataframe
scf_df = pd.read_csv('/Users/jessicamcinchak/Downloads/all_scf_issues.csv', encoding="latin1", low_memory=False)

# dict of Service Level Agreements by type with days committed to close
sla_days_to_close = {
    'Abandoned Vehicle': 5,
    'Illegal Dumping / Illegal Dump Sites': 10,
    'Tree Issue': 14,
    'Potholes': 5,
    'Residential Snow Removal Issue': 1,
    'Traffic Signal Issue': 14,
    'Traffic Sign Issue': 7,
    'Street Light Pole Down': 2,
    'New LED Street Light Out': 7,
    'Dead Animal Removal': 3,
    'Curbside Solid Waste Issue': 7,
    'Running Water in a Home or Building': 1,
    'Water Main Break': 1,
    'Fire Hydrant Issue': 1,
    'Manhole Cover Issue': 1,
    'Blocked Catch Basin': 1,
    'DPW - Debris Removal - DPW USE ONLY': 0,
    'DPW - Other environmental': 0,
    'Park Issue': 0 
}

# transform the SLA dict to a dataframe
sla_df = pd.DataFrame(list(sla_days_to_close.items()), columns=['request_type_title', 'sla_days_to_close'])

# check that scf_df dimensions (rows, cols) are as expected
print(scf_df.shape)

(6019, 43)


In [60]:
# scf_df['created_at'] and other timestamps are imported as objects by default
# we want to transform them to be datetimes so we can resample and do math later

# HELPER FUNCTIONS

# reformats object as mm-dd-yyyy
def getDate(obj):
    return datetime.strptime(obj[:-6], '%Y-%m-%dT%H:%M:%S').strftime('%m-%d-%Y')

# converts a timedelta to a float (eg '2 days 12:00:00' becomes '2.5')
def makeFloat(td):
    return td.total_seconds() / timedelta (days=1).total_seconds()

# checks if issues are closed within their SLA; returns 1 if yes/under, 0 if no/over, or NaN if not yet closed
def slaCheck(row):
    if (row['days_create_to_close'] > 0):
        if row['days_create_to_close'] < sla_days_to_close[row['request_type_title']]:
            return 1
        return 0
    pass

# NEW COLUMNS

# add 1 new col, returns mm-dd-yyyy object
scf_df['created_at_date'] = scf_df['created_at'].apply(lambda x: getDate(x))

# add 2 new cols, transforms an object to datetime64[ns] type
scf_df['created_at_datetime'] = pd.to_datetime(scf_df['created_at'])
scf_df['closed_at_datetime'] = pd.to_datetime(scf_df['closed_at'])

# add 1 new col with diff value, returns timedelta or NaT if the issue is not closed yet
scf_df['diff_create_to_close'] = scf_df['closed_at_datetime'] - scf_df['created_at_datetime']

# add 1 new col with fractional days, returns float or NaN if the issue is not closed yet
scf_df['days_create_to_close'] = scf_df['diff_create_to_close'].apply(lambda x: makeFloat(x))

# add 1 new col, indicates whether a closed issue is within its SLA
scf_df['within_sla_bool'] = scf_df.apply(lambda row: slaCheck(row), axis=1)

# check that we successfully added 6 new cols
print(scf_df.shape)

(6019, 49)
