In [1]:
import pandas as pd

ROOT_DATA_DIR = '../in/club_express_exports'
OUT_DATA_DIR = '../out/'

## Read in data
Read in the services data and remove unnecessary columns (Visible? and Notes)

In [2]:
service_df = pd.read_csv(f"{ROOT_DATA_DIR}/service_data.csv")
visible_services_df = service_df.loc[service_df['Visible?']=='Yes']
trimmed_visible_services_df = visible_services_df.drop(['Visible?', 'Notes'], axis=1)
trimmed_visible_services_df.head()

Unnamed: 0,Service,Service Category,Transportation?
0,Alarms/Locks/Security,Professional Home/Garden Servi,No
2,Appliance Repair,Professional Home/Garden Servi,No
3,Auto Repair,Professional In-Home Support,No
4,Bill Paying / Paperwork,Volunteer In-Home Support,No
5,Board Meetings,Village Admin,No


Next, we need the request type that each category belongs to. This is relevant because, when creating a type of request (i.e., Transportation Request), the service categories are filtered to only display the categories belonging to the request type.

In [3]:
completed_and_requested_services_df = pd.read_csv(f"{ROOT_DATA_DIR}/service_provided_service_data.csv")
trimmed_service_info_df = completed_and_requested_services_df[['Service', 'Service Category', 'Request Type']].drop_duplicates()
trimmed_service_info_df

Unnamed: 0,Service,Service Category,Request Type
0,IT Consultant Office,Village Admin,Office
2,Coronavirus Support Phone Call,Volunteer In-Home Support,Member's Home
6,Vol Driver Local Medical Appt,Transportation,Transportation
10,Office Work - Misc.,Village Admin,Office
18,Grocery Shopping (Covid-19),Coronavirus Community Support,Transportation
...,...,...,...
6298,Answering Phones,Village Admin,Office
6340,FB/SocMedia Admin,Professional In-Home Support,Office
6351,Snow Removal,Professional Home/Garden Servi,Contractor Referral
6366,Dog Trainer/Walker - Professio,Professional In-Home Support,Contractor Referral


## Parse data for H4I Services Dataframe

In [4]:
# This H4I dataframe will hold all our relevant information.
h4i_service_info_df = pd.DataFrame(columns=['Service', 'Service Category', 'Request Type'])

# Accumulate columns in these lists
services = []
service_categories = []
request_types = []

# Iterate through all current services and insert relevant information into the lists for H4I dataframe columns
for row in trimmed_visible_services_df[['Service', 'Service Category']].iterrows():
    # Get Service, Service Category, and Service Category ID
    service, service_category = row[1].values
    # Get Request Type
    request_type_values = trimmed_service_info_df[trimmed_service_info_df['Service'] == service]['Request Type'].values
    num_request_type_values = len(request_type_values)
    try:
        assert(num_request_type_values <= 1) # Check that there's either 0 or 1 request types associated with this service
    except AssertionError: 
        if "Contractor Referral" in request_type_values:
            list(request_type_values).remove("Contractor Referral")
            num_request_type_values-=1
            if num_request_type_values == 1: # after removing 'Contractor Referral'
                break
        print("ASSERTION ERROR")
        print(request_type_values)
    request_type = None
    if len(request_type_values) == 1:
        request_type = request_type_values[0]
        
    # Append to lists
    services.append(service)
    service_categories.append(service_category)
    request_types.append(request_type)

Construct our dataframe with **Service Category** and **Request Type** for every **Service**.

In [5]:
h4i_service_info_df['Service'] = services
h4i_service_info_df['Service Category'] = service_categories
h4i_service_info_df['Request Type'] = request_types

In [None]:
h4i_service_info_df.head()

## The request type needs to be filled in for some service categories/services.

In [6]:
h4i_service_info_df.loc[h4i_service_info_df['Request Type'].isnull()]

Unnamed: 0,Service,Service Category,Request Type
0,Alarms/Locks/Security,Professional Home/Garden Servi,
1,Appliance Repair,Professional Home/Garden Servi,
2,Auto Repair,Professional In-Home Support,
5,Cell Phone Help,Technical Support,
6,Chimneys-Repr.&Clean.,Professional Home/Garden Servi,
7,Cloud Storage,Technical Support,
18,Email,Technical Support,
22,Flooring/Carpets,Professional Home/Garden Servi,
24,Garden/Landscape-Prof.,Professional Home/Garden Servi,
28,hack4impact test service,Transportation,


Fix these rows with "None" as Request Type.

In [7]:
h4i_service_info_df.loc[[0, 1, 2, 5, 6, 7, 18, 22, 24], 'Request Type']= "Member's Home"

Drop the "hack4impact test service" we created in the system.

In [8]:
h4i_service_info_df.drop(28, inplace=True); # drop 'hack4impact test service'

Check that there are no more "None" values in Request Type.

In [9]:
h4i_service_info_df.loc[h4i_service_info_df['Request Type'].isnull()]

Unnamed: 0,Service,Service Category,Request Type


Remove "Contractor Referral" Requests

In [10]:
h4i_service_info_df = h4i_service_info_df[h4i_service_info_df['Request Type'] != 'Contractor Referral']
h4i_service_info_df.head()
h4i_service_info_df.to_csv(f'{OUT_DATA_DIR}/all_service_info.csv', index=False)

## Now, let's gather Service Category -> Request mappings, to be used when we create the categories dataframe.

In [29]:
category_and_request_type_info = h4i_service_info_df[['Service Category', 'Request Type']].drop_duplicates()
category_and_request_type_info.set_index('Service Category', inplace=True)
category_and_request_type_info.loc['Village Admin']['Request Type']
# category_and_request_type_info['Village Admin']

'Office'

## Create tables of services, service categories, and request types with their IDs

In [42]:
service_categories_set = list(set(h4i_service_info_df['Service Category']))
services_set = list(set(h4i_service_info_df['Service']))
request_types_set = list(set(h4i_service_info_df['Request Type']))

h4i_service_categories_df = pd.DataFrame()
h4i_services_df = pd.DataFrame()
h4i_request_types_df = pd.DataFrame()

In [45]:
h4i_request_types_df['Name'] = request_types_set
h4i_request_types_df.head()
h4i_request_types_df.to_csv(f'{OUT_DATA_DIR}/request_types.csv', index_label='id')

In [47]:
h4i_request_types_df

Unnamed: 0,Name
0,Office
1,Member's Home
2,Transportation


In [46]:
h4i_services_df['Name'] = services_set
h4i_services_df.head()
h4i_services_df.to_csv(f'{OUT_DATA_DIR}/services.csv', index_label='id')

In [54]:
h4i_service_categories_df['Name'] = service_categories_set
h4i_service_categories_df['Request Type'] = [category_and_request_type_info.loc[category]['Request Type'] for 
                                             category in h4i_service_categories_df['Name']]
h4i_service_categories_df['Request Type ID'] = [h4i_request_types_df[h4i_request_types_df['Name'] == request_type].index[0] for 
                                             request_type in h4i_service_categories_df['Request Type']]
h4i_service_categories_df.head()
h4i_service_categories_df.to_csv(f'{OUT_DATA_DIR}/service_categories.csv', index_label='id')