In [1]:
import pandas as pd

ROOT_DATA_DIR = '../in/club_express_exports'
OUT_DATA_DIR = '../out/'

## Read in data
Read in the services data and remove unnecessary columns (Visible? and Notes)

In [2]:
service_df = pd.read_csv(f"{ROOT_DATA_DIR}/service_data.csv")
visible_services_df = service_df.loc[service_df['Visible?']=='Yes']
trimmed_visible_services_df = visible_services_df.drop(['Visible?', 'Notes'], axis=1)
trimmed_visible_services_df.head()

Unnamed: 0,Service,Service Category,Transportation?
0,Alarms/Locks/Security,Professional Home/Garden Servi,No
2,Appliance Repair,Professional Home/Garden Servi,No
3,Auto Repair,Professional In-Home Support,No
4,Bill Paying / Paperwork,Volunteer In-Home Support,No
5,Board Meetings,Village Admin,No


Next, we need the request type that each category belongs to. This is relevant because, when creating a type of request (i.e., Transportation Request), the service categories are filtered to only display the categories belonging to the request type.

In [3]:
completed_and_requested_services_df = pd.read_csv(f"{ROOT_DATA_DIR}/service_provided_service_data.csv")
trimmed_service_info_df = completed_and_requested_services_df[['Service', 'Service Category', 'Request Type']].drop_duplicates()
trimmed_service_info_df

Unnamed: 0,Service,Service Category,Request Type
0,IT Consultant Office,Village Admin,Office
2,Coronavirus Support Phone Call,Volunteer In-Home Support,Member's Home
6,Vol Driver Local Medical Appt,Transportation,Transportation
10,Office Work - Misc.,Village Admin,Office
18,Grocery Shopping (Covid-19),Coronavirus Community Support,Transportation
...,...,...,...
6298,Answering Phones,Village Admin,Office
6340,FB/SocMedia Admin,Professional In-Home Support,Office
6351,Snow Removal,Professional Home/Garden Servi,Contractor Referral
6366,Dog Trainer/Walker - Professio,Professional In-Home Support,Contractor Referral


## Parse data for H4I Services Dataframe

In [4]:
# This H4I dataframe will hold all our relevant information.
h4i_service_info_df = pd.DataFrame(columns=['Service', 'Service Category', 'Request Type'])

# Accumulate columns in these lists
services = []
service_categories = []
request_types = []

# Iterate through all current services and insert relevant information into the lists for H4I dataframe columns
for row in trimmed_visible_services_df[['Service', 'Service Category']].iterrows():
    # Get Service, Service Category, and Service Category ID
    service, service_category = row[1].values
    # Get Request Type
    request_type_values = trimmed_service_info_df[trimmed_service_info_df['Service'] == service]['Request Type'].values
    num_request_type_values = len(request_type_values)
    try:
        assert(num_request_type_values <= 1) # Check that there's either 0 or 1 request types associated with this service
    except AssertionError: 
        if "Contractor Referral" in request_type_values:
            list(request_type_values).remove("Contractor Referral")
            num_request_type_values-=1
            if num_request_type_values == 1: # after removing 'Contractor Referral'
                break
        print("ASSERTION ERROR")
        print(request_type_values)
    request_type = None
    if len(request_type_values) == 1:
        request_type = request_type_values[0]
        
    # Append to lists
    services.append(service)
    service_categories.append(service_category)
    request_types.append(request_type)

Construct our dataframe with **Service Category** and **Request Type** for every **Service**.

In [5]:
h4i_service_info_df['Service'] = services
h4i_service_info_df['Service Category'] = service_categories
h4i_service_info_df['Request Type'] = request_types

In [6]:
h4i_service_info_df.head()

Unnamed: 0,Service,Service Category,Request Type
0,Alarms/Locks/Security,Professional Home/Garden Servi,
1,Appliance Repair,Professional Home/Garden Servi,
2,Auto Repair,Professional In-Home Support,
3,Bill Paying / Paperwork,Volunteer In-Home Support,Member's Home
4,Board Meetings,Village Admin,Office


## The request type needs to be filled in for some service categories/services.

In [7]:
h4i_service_info_df.loc[h4i_service_info_df['Request Type'].isnull()]

Unnamed: 0,Service,Service Category,Request Type
0,Alarms/Locks/Security,Professional Home/Garden Servi,
1,Appliance Repair,Professional Home/Garden Servi,
2,Auto Repair,Professional In-Home Support,
5,Cell Phone Help,Technical Support,
6,Chimneys-Repr.&Clean.,Professional Home/Garden Servi,
7,Cloud Storage,Technical Support,
18,Email,Technical Support,
22,Flooring/Carpets,Professional Home/Garden Servi,
24,Garden/Landscape-Prof.,Professional Home/Garden Servi,
28,hack4impact test service,Transportation,


Fix these rows with "None" as Request Type.

In [8]:
h4i_service_info_df.loc[[0, 1, 2, 5, 6, 7, 18, 22, 24], 'Request Type']= "Member's Home"

Drop the "hack4impact test service" we created in the system.

In [9]:
h4i_service_info_df.drop(28, inplace=True); # drop 'hack4impact test service'

Check that there are no more "None" values in Request Type.

In [10]:
h4i_service_info_df.loc[h4i_service_info_df['Request Type'].isnull()]

Unnamed: 0,Service,Service Category,Request Type


Remove "Contractor Referral" Requests

In [11]:
h4i_service_info_df = h4i_service_info_df[h4i_service_info_df['Request Type'] != 'Contractor Referral']
h4i_service_info_df.head()

Unnamed: 0,Service,Service Category,Request Type
0,Alarms/Locks/Security,Professional Home/Garden Servi,Member's Home
1,Appliance Repair,Professional Home/Garden Servi,Member's Home
2,Auto Repair,Professional In-Home Support,Member's Home
3,Bill Paying / Paperwork,Volunteer In-Home Support,Member's Home
4,Board Meetings,Village Admin,Office


## Create tables of services, service categories, and request types with their IDs

In [12]:
service_categories_set = list(set(h4i_service_info_df['Service Category']))
services_set = list(set(h4i_service_info_df['Service']))
request_types_set = list(set(h4i_service_info_df['Request Type']))

In [13]:
h4i_request_types_df = pd.DataFrame()
h4i_request_types_df['Name'] = request_types_set
# h4i_request_types_df.to_csv(f'{OUT_DATA_DIR}/request_types.csv', index_label='ID')
h4i_request_types_df.head()

Unnamed: 0,Name
0,Transportation
1,Member's Home
2,Office


In [14]:
h4i_service_categories_df = pd.DataFrame()
h4i_service_categories_df['Name'] = service_categories_set

# Get Category -> Request Type mappings
category_and_request_type_info = h4i_service_info_df[['Service Category', 'Request Type']].drop_duplicates()
category_and_request_type_info.set_index('Service Category', inplace=True)

# Add request type IDs
request_types = [category_and_request_type_info.loc[category]['Request Type'] for category in h4i_service_categories_df['Name']]
print(request_types)
h4i_service_categories_df['Request Type ID'] = [h4i_request_types_df[h4i_request_types_df['Name'] == request_type].index[0] for request_type in request_types]

# h4i_service_categories_df.to_csv(f'{OUT_DATA_DIR}/service_categories.csv', index_label='ID')
h4i_service_categories_df.head()

["Member's Home", "Member's Home", 'Transportation', "Member's Home", "Member's Home", 'Transportation', 'Office', "Member's Home"]


Unnamed: 0,Name,Request Type ID
0,Professional In-Home Support,1
1,Technical Support,1
2,Transportation,0
3,Volunteer In-Home Support,1
4,Professional Home/Garden Servi,1


In [15]:
h4i_services_df = pd.DataFrame()
h4i_services_df['Name'] = services_set

# Get Category IDs of services
categories_of_services = [h4i_service_info_df[h4i_service_info_df['Service'] == service]['Service Category'].values[0] for service in h4i_services_df['Name']]                
h4i_services_df['Category ID'] = [h4i_service_categories_df[h4i_service_categories_df['Name'] == category].index[0] for category in categories_of_services]

h4i_services_df.to_csv(f'{OUT_DATA_DIR}/services.csv', index_label='ID')
h4i_services_df.head()

Unnamed: 0,Name,Category ID
0,Email,1
1,Errands w/out Member,3
2,Flooring/Carpets,4
3,Home Health Care,0
4,Committee,6


## Now write the service info df.

In [16]:
h4i_service_categories_df 

Unnamed: 0,Name,Request Type ID
0,Professional In-Home Support,1
1,Technical Support,1
2,Transportation,0
3,Volunteer In-Home Support,1
4,Professional Home/Garden Servi,1
5,Coronavirus Community Support,0
6,Village Admin,2
7,Volunteer Home/Garden Service,1


In [17]:
[h4i_service_categories_df category in h4i_service_info_df['Service Category']]

SyntaxError: invalid syntax (<ipython-input-17-20f8b813cf47>, line 1)

# People Data

In [196]:
# Read in member data
members_df = pd.read_csv(f"{ROOT_DATA_DIR}/members_including_metro_area_data.csv")
members_df.head()

Unnamed: 0,Member Number,First Name,Last Name,Email,Address 1,Address 2,City,State,Zip,Metro Area,...,Sponsor Name,Chapter,Last Renewal Date,Secondary Type Name,Member Level,Gender,Allow Club Email?,Printed Newsletter?,Mailing Name,Use Alt Address?
0,229,Andrea,Aching,annaching2017@yahoo.com,115 Eddy Street,,Ithaca,NY,14850,,...,,,,Mother,Secondary,Female,Yes,No,Andrea Aching,No
1,228,Gerard,Aching,gerardaching@hotmail.com,115 Eddy Street,,Ithaca,NY,14850,,...,,,,,Primary,Male,Yes,Yes,Gerard L Aching,No
2,230,William,Aching,gerardaching@hotmail.com,115 Eddy Street,,Ithaca,NY,14850,,...,,,,Father,Secondary,Male,Yes,No,,No
3,233,Peggy,Adams,madams@ithaca.edu,417 Hector Street,,Ithaca,NY,14850,,...,,,09/12/2020,,Primary,Female,Yes,No,Margaret Adams,No
4,259,Nancy,Ahlers,freddi1133@gmail.com,814 Handshaw Rd,,Ithaca,NY,14850,,...,,,07/16/2020,,Primary,Female,Yes,No,Nancy Ahlers,No


In [197]:
# Drop irrelevant columns from members_df and rename as necessary
members_df = members_df[['Member Number','First Name', 'Middle Initial','Last Name',
                        'Gender','Nickname','Address 1', 'Address 2', 'City', 
                         'State', 'Zip', 'Country', 'Metro Area', 'Phone', 'Cell Phone',
                        'Email', 'Date Expired']]
members_df = members_df.rename(columns={"Nickname": "Preferred Name", "Phone": "Primary Phone Number", 
                           "Cell Phone": "Secondary Phone Number"})
members_df.head()

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Address 1,Address 2,City,State,Zip,Country,Metro Area,Primary Phone Number,Secondary Phone Number,Email,Date Expired
0,229,Andrea,,Aching,Female,,115 Eddy Street,,Ithaca,NY,14850,United States of America,,6073194170.0,7073383665.0,annaching2017@yahoo.com,09/09/2020
1,228,Gerard,L,Aching,Male,,115 Eddy Street,,Ithaca,NY,14850,United States of America,,6073194170.0,9175455737.0,gerardaching@hotmail.com,09/09/2020
2,230,William,,Aching,Male,,115 Eddy Street,,Ithaca,NY,14850,United States of America,,6073194170.0,,gerardaching@hotmail.com,09/09/2020
3,233,Peggy,,Adams,Female,,417 Hector Street,,Ithaca,NY,14850,United States of America,,6072776232.0,,madams@ithaca.edu,09/26/2021
4,259,Nancy,,Ahlers,Female,,814 Handshaw Rd,,Ithaca,NY,14850,United States of America,,,,freddi1133@gmail.com,07/20/2021


In [198]:
# Read in emergency contact information and rename columns
members_emergency_contact = pd.read_csv(f"{ROOT_DATA_DIR}/member_emergency_contact.csv")
members_emergency_contact = members_emergency_contact.rename(columns={"Contact Name": "Emergency Contact Name", 
                                                                      "Relationship": "Emergency Contact Relation", 
                                                                      "Contact Email": "Emergency Contact Email Address",
                                                                     "Contact Phone": "Emergency Contact Phone Number"})
members_emergency_contact.head()

Unnamed: 0,Member Name,Emergency Contact Name,Emergency Contact Relation,Emergency Contact Email Address,Emergency Contact Phone Number
0,Peggy Adams,,,,
1,Nancy Ahlers,,,,
2,Dilmeran Akgoze,Anita Racine,close friend and retired from Cornell,,607-273-1235
3,Paula Amols,,,,
4,Wendy Aquadro,,,,


In [199]:
# Add member emergency contact information
members_df['Member Name'] = members_df['First Name'] + ' ' + members_df['Last Name']
members_df = members_df.merge(members_emergency_contact, how='left',
                 left_on='Member Name', right_on='Member Name',)
members_df[members_df['Member Name'] == 'Dilmeran Akgoze']

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Address 1,Address 2,City,State,...,Metro Area,Primary Phone Number,Secondary Phone Number,Email,Date Expired,Member Name,Emergency Contact Name,Emergency Contact Relation,Emergency Contact Email Address,Emergency Contact Phone Number
5,332,Dilmeran,,Akgoze,Female,,215 North Cayuga Street-DeWitt Mall,Apt. 233,,NY,...,Downtown Ithaca,607-256-8515,,dd292cu@gmail.com,08/02/2021,Dilmeran Akgoze,Anita Racine,close friend and retired from Cornell,,607-273-1235


In [200]:
# Now drop the extra column we created
members_df.drop(['Member Name'],axis=1)

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Address 1,Address 2,City,State,...,Country,Metro Area,Primary Phone Number,Secondary Phone Number,Email,Date Expired,Emergency Contact Name,Emergency Contact Relation,Emergency Contact Email Address,Emergency Contact Phone Number
0,229,Andrea,,Aching,Female,,115 Eddy Street,,Ithaca,NY,...,United States of America,,6073194170,7073383665,annaching2017@yahoo.com,09/09/2020,,,,
1,228,Gerard,L,Aching,Male,,115 Eddy Street,,Ithaca,NY,...,United States of America,,6073194170,9175455737,gerardaching@hotmail.com,09/09/2020,,,,
2,230,William,,Aching,Male,,115 Eddy Street,,Ithaca,NY,...,United States of America,,6073194170,,gerardaching@hotmail.com,09/09/2020,,,,
3,233,Peggy,,Adams,Female,,417 Hector Street,,Ithaca,NY,...,United States of America,,6072776232,,madams@ithaca.edu,09/26/2021,,,,
4,259,Nancy,,Ahlers,Female,,814 Handshaw Rd,,Ithaca,NY,...,United States of America,,,,freddi1133@gmail.com,07/20/2021,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,296,Tori,,Wishart,Female,,40 Horizon Drive,,Ithaca,NY,...,United States of America,,607-279-3497,,tori@twcny.rr.com,07/21/2021,,,,
248,310,Frances,,Withiam,Female,,9 Horizon Drive,,Ithaca,NY,...,United States of America,Lansing,607-273-5375,,Lwithiam@twcny.rr.com,08/03/2021,,,,
249,234,Rochelle,,Woods,Female,,114 Glenside Rd.,,Ithaca,NY,...,United States of America,,607-277-3497,607-339-1956,rewoods112@gmail.com,10/01/2021,,,,
250,43,Eugene,,Yarussi,Male,Gene,56 Waterview Heights Road,,Ithaca,NY,...,United States of America,,(607) 277-6520,(607) 279-6613,eyarussi@gmail.com,07/10/2021,,,,


In [201]:
# Read in member notes information and preprocess out irrelevant columns, rename relevant..
members_notes = pd.read_csv(f"{ROOT_DATA_DIR}/member_notes.csv")
members_notes.reset_index(inplace=True)
members_notes = members_notes.rename(columns={'level_0': 'Member Name', 'Email':'Notes'})
members_notes = members_notes.drop(['level_1', 'Name', 'Phone'], axis=1)
members_notes.head()

Unnamed: 0,Member Name,Notes
0,"Jaquette, John","Emergency Contact is Susan Jaquette, wife\r\ne..."
1,"Regenstein, Carrie",Emergency Contact: \r\nJoe Regenstein husband\...
2,"Lemley, Ann",Committee Member Only
3,"Dubovi, Robin","Prefers not to drive in snowy, bad-weather con..."
4,"Murphy, Rosanne","Emergency Contract: Deb Roe, Partner"


In [202]:
# Add member notes to members df
members_df['Member Name'] = members_df['Last Name'] + ', ' + members_df['First Name']
members_df = members_df.merge(members_notes, how='left',
                 left_on='Member Name', right_on='Member Name',)
members_df.head()
members_df[members_df['Member Name'] == 'Jaquette, John']

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Address 1,Address 2,City,State,...,Primary Phone Number,Secondary Phone Number,Email,Date Expired,Member Name,Emergency Contact Name,Emergency Contact Relation,Emergency Contact Email Address,Emergency Contact Phone Number,Notes
95,67,John,,Jaquette,Male,,41 Forest Acres Drive,,Ithaca,NY,...,(607) 257-4988,(607) 229-9523,jpj7@cornell.edu,08/28/2021,"Jaquette, John",,,,,"Emergency Contact is Susan Jaquette, wife\r\ne..."


In [203]:
# Now drop the extra column we created
members_df.drop(['Member Name'],axis=1)

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Address 1,Address 2,City,State,...,Metro Area,Primary Phone Number,Secondary Phone Number,Email,Date Expired,Emergency Contact Name,Emergency Contact Relation,Emergency Contact Email Address,Emergency Contact Phone Number,Notes
0,229,Andrea,,Aching,Female,,115 Eddy Street,,Ithaca,NY,...,,6073194170,7073383665,annaching2017@yahoo.com,09/09/2020,,,,,
1,228,Gerard,L,Aching,Male,,115 Eddy Street,,Ithaca,NY,...,,6073194170,9175455737,gerardaching@hotmail.com,09/09/2020,,,,,
2,230,William,,Aching,Male,,115 Eddy Street,,Ithaca,NY,...,,6073194170,,gerardaching@hotmail.com,09/09/2020,,,,,
3,233,Peggy,,Adams,Female,,417 Hector Street,,Ithaca,NY,...,,6072776232,,madams@ithaca.edu,09/26/2021,,,,,
4,259,Nancy,,Ahlers,Female,,814 Handshaw Rd,,Ithaca,NY,...,,,,freddi1133@gmail.com,07/20/2021,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,296,Tori,,Wishart,Female,,40 Horizon Drive,,Ithaca,NY,...,,607-279-3497,,tori@twcny.rr.com,07/21/2021,,,,,
248,310,Frances,,Withiam,Female,,9 Horizon Drive,,Ithaca,NY,...,Lansing,607-273-5375,,Lwithiam@twcny.rr.com,08/03/2021,,,,,
249,234,Rochelle,,Woods,Female,,114 Glenside Rd.,,Ithaca,NY,...,,607-277-3497,607-339-1956,rewoods112@gmail.com,10/01/2021,,,,,
250,43,Eugene,,Yarussi,Male,Gene,56 Waterview Heights Road,,Ithaca,NY,...,,(607) 277-6520,(607) 279-6613,eyarussi@gmail.com,07/10/2021,,,,,


In [204]:
# Combine address lines to one address
members_df['Address'] = members_df[['Address 1','Address 2']].apply(lambda x: ','.join(x.dropna()),axis=1)
members_df = members_df.drop(['Address 1', 'Address 2'], axis=1)
members_df.head()

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,City,State,Zip,Country,...,Secondary Phone Number,Email,Date Expired,Member Name,Emergency Contact Name,Emergency Contact Relation,Emergency Contact Email Address,Emergency Contact Phone Number,Notes,Address
0,229,Andrea,,Aching,Female,,Ithaca,NY,14850,United States of America,...,7073383665.0,annaching2017@yahoo.com,09/09/2020,"Aching, Andrea",,,,,,115 Eddy Street
1,228,Gerard,L,Aching,Male,,Ithaca,NY,14850,United States of America,...,9175455737.0,gerardaching@hotmail.com,09/09/2020,"Aching, Gerard",,,,,,115 Eddy Street
2,230,William,,Aching,Male,,Ithaca,NY,14850,United States of America,...,,gerardaching@hotmail.com,09/09/2020,"Aching, William",,,,,,115 Eddy Street
3,233,Peggy,,Adams,Female,,Ithaca,NY,14850,United States of America,...,,madams@ithaca.edu,09/26/2021,"Adams, Peggy",,,,,,417 Hector Street
4,259,Nancy,,Ahlers,Female,,Ithaca,NY,14850,United States of America,...,,freddi1133@gmail.com,07/20/2021,"Ahlers, Nancy",,,,,,814 Handshaw Rd


In [205]:
list(members_df.columns)

['Member Number',
 'First Name',
 'Middle Initial',
 'Last Name',
 'Gender',
 'Preferred Name',
 'City',
 'State',
 'Zip',
 'Country',
 'Metro Area',
 'Primary Phone Number',
 'Secondary Phone Number',
 'Email',
 'Date Expired',
 'Member Name',
 'Emergency Contact Name',
 'Emergency Contact Relation',
 'Emergency Contact Email Address',
 'Emergency Contact Phone Number',
 'Notes',
 'Address']

In [206]:
# Read in service provider data
service_provider_df = pd.read_csv(f"{ROOT_DATA_DIR}/service_provider_data.csv")
service_provider_df.head()

Unnamed: 0,Service Provider First Name,Service Provider Laast Name,Type,Company,Address,City,State,Zip,Phone,Cell Phone,Emergency Contact Phone #,Email,Preferred Contact Method,Website,Fully Vetted?,Visible?,Discount Notes
0,,,Non-Member Contractor,Ace Security,720 W. Green St.,Ithaca,NY,14850,6072738840,,,,Phone,,No,No,
1,,,Non-Member Contractor,ACME Pest Control,359 Dryden-Harford Rd.,Dryden,NY,13053,607-844-8689,,,,Phone,,No,No,
2,,,Non-Member Contractor,Bailey Construction,5139 Jacksonville Road,Trumansburg,NY,14886,607-209-4114,,,,Phone,www.baileyconstruction.vpweb.com,No,No,
3,,,Non-Member Contractor,Bell's Auto Service,945 Dryden Rd.,Ithaca,NY,14850,607-273-9325,,,,Phone,www.bellsauto.com,No,No,
4,,,Non-Member Contractor,Bumblebee Painters,210 E. Falls St.,Ithaca,NY,14850,607-273-6521,,,info@bumblebeepainters.com,Email,www.bumblebeepainters.com,No,Yes,


In [207]:
# Take volunteer data out
volunteer_df = service_provider_df[service_provider_df.Type != 'Non-Member Contractor']
volunteer_df.head()

Unnamed: 0,Service Provider First Name,Service Provider Laast Name,Type,Company,Address,City,State,Zip,Phone,Cell Phone,Emergency Contact Phone #,Email,Preferred Contact Method,Website,Fully Vetted?,Visible?,Discount Notes
41,Paula,Amols,Member Volunteer,,370 Snyder Hill Rd.,Ithaca,NY,14850,(607) 592-1166,,,pamols54@gmail.com,Email,,Yes,Yes,
42,Wendy,Aquadro,Member Volunteer,,283 ELLIS HOLLOW CREEK RD,ITHACA,NY,14850-9619,6072733049,6073516275,,gsa8@cornell.edu,Phone,,Yes,Yes,
46,Eileen,Berlow,Member Volunteer,,2444 Perry City Road,Ithaca,NY,14850,(607) 379-3565,(607) 387-6890,,eileenberlow@yahoo.com,Phone,,No,Yes,
47,Deborah,Berman,Non-Member Volunteer,,110 Hancock St,Ithaca,NY,14850,(607) 273-2901,,(607) 342-7841,skaboooch@gmail.com,Email,,Yes,Yes,
48,Cassie,Besemer,Member Volunteer,,,,NY,,6073192866,,,office@lovelivingathome.org,Phone,,Yes,Yes,


In [208]:
list(volunteer_df.columns)

['Service Provider First Name',
 'Service Provider Laast Name',
 'Type',
 'Company',
 'Address',
 'City',
 'State',
 'Zip',
 'Phone',
 'Cell Phone',
 'Emergency Contact Phone #',
 'Email',
 'Preferred Contact Method',
 'Website',
 'Fully Vetted?',
 'Visible?',
 'Discount Notes']

In [209]:
# Preprocess volunteer data by dropping irrelevant data, renaming..
volunteer_df = volunteer_df.drop(['Type', 'Company', 'Website','Visible?','Discount Notes'],axis=1)
volunteer_df = volunteer_df.rename(columns={'Service Provider First Name': 'First Name', 
                                            'Service Provider Laast Name': 'Last Name',
                                           'Emergency Contact Phone #':'Emergency Contact Phone Number',
                                           'Fully Vetted?': 'Vetting',
                                           'Phone': 'Primary Phone Number',
                                           'Cell Phone': 'Secondary Phone Number'})
volunteer_df.head()

Unnamed: 0,First Name,Last Name,Address,City,State,Zip,Primary Phone Number,Secondary Phone Number,Emergency Contact Phone Number,Email,Preferred Contact Method,Vetting
41,Paula,Amols,370 Snyder Hill Rd.,Ithaca,NY,14850,(607) 592-1166,,,pamols54@gmail.com,Email,Yes
42,Wendy,Aquadro,283 ELLIS HOLLOW CREEK RD,ITHACA,NY,14850-9619,6072733049,6073516275,,gsa8@cornell.edu,Phone,Yes
46,Eileen,Berlow,2444 Perry City Road,Ithaca,NY,14850,(607) 379-3565,(607) 387-6890,,eileenberlow@yahoo.com,Phone,No
47,Deborah,Berman,110 Hancock St,Ithaca,NY,14850,(607) 273-2901,,(607) 342-7841,skaboooch@gmail.com,Email,Yes
48,Cassie,Besemer,,,NY,,6073192866,,,office@lovelivingathome.org,Phone,Yes


In [210]:
list(volunteer_df.columns)

['First Name',
 'Last Name',
 'Address',
 'City',
 'State',
 'Zip',
 'Primary Phone Number',
 'Secondary Phone Number',
 'Emergency Contact Phone Number',
 'Email',
 'Preferred Contact Method',
 'Vetting']

In [211]:
# Take out local resource data
local_resource_df = service_provider_df[service_provider_df.Type == 'Non-Member Contractor']
local_resource_df.head()

Unnamed: 0,Service Provider First Name,Service Provider Laast Name,Type,Company,Address,City,State,Zip,Phone,Cell Phone,Emergency Contact Phone #,Email,Preferred Contact Method,Website,Fully Vetted?,Visible?,Discount Notes
0,,,Non-Member Contractor,Ace Security,720 W. Green St.,Ithaca,NY,14850,6072738840,,,,Phone,,No,No,
1,,,Non-Member Contractor,ACME Pest Control,359 Dryden-Harford Rd.,Dryden,NY,13053,607-844-8689,,,,Phone,,No,No,
2,,,Non-Member Contractor,Bailey Construction,5139 Jacksonville Road,Trumansburg,NY,14886,607-209-4114,,,,Phone,www.baileyconstruction.vpweb.com,No,No,
3,,,Non-Member Contractor,Bell's Auto Service,945 Dryden Rd.,Ithaca,NY,14850,607-273-9325,,,,Phone,www.bellsauto.com,No,No,
4,,,Non-Member Contractor,Bumblebee Painters,210 E. Falls St.,Ithaca,NY,14850,607-273-6521,,,info@bumblebeepainters.com,Email,www.bumblebeepainters.com,No,Yes,


In [212]:
list(local_resource_df.columns)

['Service Provider First Name',
 'Service Provider Laast Name',
 'Type',
 'Company',
 'Address',
 'City',
 'State',
 'Zip',
 'Phone',
 'Cell Phone',
 'Emergency Contact Phone #',
 'Email',
 'Preferred Contact Method',
 'Website',
 'Fully Vetted?',
 'Visible?',
 'Discount Notes']

In [213]:
local_resource_df = local_resource_df.drop(['Type','Fully Vetted?', 'Visible?'],axis=1)
local_resource_df = local_resource_df.rename(columns={'Service Provider First Name': 'Contact First Name', 
                                            'Service Provider Laast Name': 'Contact Last Name',
                                           'Emergency Contact Phone #':'Emergency Contact Phone Number',
                                           'Phone': 'Primary Phone Number',
                                           'Cell Phone': 'Secondary Phone Number',
                                            'Discount Notes': 'Notes'})
local_resource_df.head()

Unnamed: 0,Contact First Name,Contact Last Name,Company,Address,City,State,Zip,Primary Phone Number,Secondary Phone Number,Emergency Contact Phone Number,Email,Preferred Contact Method,Website,Notes
0,,,Ace Security,720 W. Green St.,Ithaca,NY,14850,6072738840,,,,Phone,,
1,,,ACME Pest Control,359 Dryden-Harford Rd.,Dryden,NY,13053,607-844-8689,,,,Phone,,
2,,,Bailey Construction,5139 Jacksonville Road,Trumansburg,NY,14886,607-209-4114,,,,Phone,www.baileyconstruction.vpweb.com,
3,,,Bell's Auto Service,945 Dryden Rd.,Ithaca,NY,14850,607-273-9325,,,,Phone,www.bellsauto.com,
4,,,Bumblebee Painters,210 E. Falls St.,Ithaca,NY,14850,607-273-6521,,,info@bumblebeepainters.com,Email,www.bumblebeepainters.com,


In [214]:
list(local_resource_df.columns)

['Contact First Name',
 'Contact Last Name',
 'Company',
 'Address',
 'City',
 'State',
 'Zip',
 'Primary Phone Number',
 'Secondary Phone Number',
 'Emergency Contact Phone Number',
 'Email',
 'Preferred Contact Method',
 'Website',
 'Notes']

## Metro Areas

In [215]:
# Create and save metro area df
metro_areas = pd.DataFrame(['Brooktondale', 'Caroline', 'Danby', 'Downtown Ithaca', 'Dryden', 'Enfield', 'Freeville', 'Groton', 'Lansing', 'Newfield', 'Outside Tompkins County', 'Slaterville Springs', 'South Hill', 'Tompkins County', 'Trumansburg', 'Jacksonville', 'Ulysses', 'Vana', 'West Hill'])
metro_areas.to_csv(OUT_DATA_DIR+'metro_areas.csv')

## Address ID Replacement

In [216]:
# Add volunteer and local resource addresses to address book
column_names = ['Address','City','State','Zip']
address_data = []
for idx, row in members_df.iterrows():
    d = {}
    for c in column_names:
        d[c] = row[c]
    address_data.append(d)
for idx, row in local_resource_df.iterrows():
    d = {}
    for c in column_names:
        d[c] = row[c]
    address_data.append(d)
for idx, row in volunteer_df.iterrows():
    d = {}
    for c in column_names:
        d[c] = row[c]
    address_data.append(d)
address_df = pd.DataFrame(address_data)
address_df = address_df.drop_duplicates()
address_df = address_df.reset_index(drop=True)
address_df = address_df.reset_index()
address_df = address_df.rename(columns={'index': 'Address ID'})
address_df.head()

Unnamed: 0,Address ID,Address,City,State,Zip
0,0,115 Eddy Street,Ithaca,NY,14850
1,1,417 Hector Street,Ithaca,NY,14850
2,2,814 Handshaw Rd,Ithaca,NY,14850
3,3,"215 North Cayuga Street-DeWitt Mall,Apt. 233",,NY,14850
4,4,370 Snyder Hill Rd.,Ithaca,NY,14850


In [217]:
# Export addresses
address_df.to_csv(OUT_DATA_DIR+'addresses.csv')

In [218]:
# Merge address IDs in to volunteer and local resource data
volunteer_df = volunteer_df.merge(address_df, how='left', on=["Address","City","State","Zip"])
local_resource_df = local_resource_df.merge(address_df, how='left', on=["Address","City","State","Zip"])
members_df = members_df.merge(address_df, how='left', on=["Address","City","State","Zip"])

In [219]:
# Replace addresses in members with address ID
volunteer_df = volunteer_df.drop(['Address','City','State','Zip'], axis=1)
local_resource_df = local_resource_df.drop(['Address','City','State','Zip'], axis=1)
members_df = members_df.drop(['Address','City','State','Zip'], axis=1)
volunteer_df.head()

Unnamed: 0,First Name,Last Name,Primary Phone Number,Secondary Phone Number,Emergency Contact Phone Number,Email,Preferred Contact Method,Vetting,Address ID
0,Paula,Amols,(607) 592-1166,,,pamols54@gmail.com,Email,Yes,4
1,Wendy,Aquadro,6072733049,6073516275,,gsa8@cornell.edu,Phone,Yes,5
2,Eileen,Berlow,(607) 379-3565,(607) 387-6890,,eileenberlow@yahoo.com,Phone,No,8
3,Deborah,Berman,(607) 273-2901,,(607) 342-7841,skaboooch@gmail.com,Email,Yes,267
4,Cassie,Besemer,6073192866,,,office@lovelivingathome.org,Phone,Yes,204


In [220]:
# Phone Number df
phone_numbers = list(members_df['Primary Phone Number'])
phone_numbers.extend(list(members_df['Secondary Phone Number']))
phone_numbers.extend(list(members_df['Emergency Contact Phone Number']))
phone_numbers.extend(list(volunteer_df['Primary Phone Number']))
phone_numbers.extend(list(volunteer_df['Secondary Phone Number']))
phone_numbers.extend(list(volunteer_df['Emergency Contact Phone Number']))
phone_numbers.extend(list(local_resource_df['Primary Phone Number']))
phone_numbers.extend(list(local_resource_df['Secondary Phone Number']))
phone_numbers.extend(list(local_resource_df['Emergency Contact Phone Number']))
phone_df = pd.DataFrame(phone_numbers)
phone_df = phone_df.drop_duplicates()
phone_df = phone_df[phone_df[0].notna()]
phone_df = phone_df.reset_index(drop=True)
phone_df = phone_df.reset_index()
phone_df = phone_df.rename(columns={'index': 'Phone ID', 0:'Phone Number'})
phone_df.head()

Unnamed: 0,Phone ID,Phone Number
0,0,6073194170
1,1,6072776232
2,2,607-256-8515
3,3,(607) 592-1166
4,4,6072733049


In [221]:
# Export phone numbers
phone_df.to_csv(OUT_DATA_DIR+'phone_numbers.csv')

In [222]:
volunteer_df = volunteer_df.merge(phone_df, how='left', left_on=["Primary Phone Number"],right_on=["Phone Number"])
local_resource_df = local_resource_df.merge(phone_df, how='left', left_on=["Primary Phone Number"],right_on=["Phone Number"])
members_df = members_df.merge(phone_df, how='left', left_on=["Primary Phone Number"],right_on=["Phone Number"])
volunteer_df['Primary Phone Number'] = volunteer_df['Phone ID']
volunteer_df = volunteer_df.drop(['Phone ID','Phone Number'],axis=1)
local_resource_df['Primary Phone Number'] = local_resource_df['Phone ID']
local_resource_df = local_resource_df.drop(['Phone ID','Phone Number'],axis=1)
members_df['Primary Phone Number'] = members_df['Phone ID']
members_df = members_df.drop(['Phone ID','Phone Number'],axis=1)
volunteer_df.head()

Unnamed: 0,First Name,Last Name,Primary Phone Number,Secondary Phone Number,Emergency Contact Phone Number,Email,Preferred Contact Method,Vetting,Address ID
0,Paula,Amols,3.0,,,pamols54@gmail.com,Email,Yes,4
1,Wendy,Aquadro,4.0,6073516275,,gsa8@cornell.edu,Phone,Yes,5
2,Eileen,Berlow,7.0,(607) 387-6890,,eileenberlow@yahoo.com,Phone,No,8
3,Deborah,Berman,297.0,,(607) 342-7841,skaboooch@gmail.com,Email,Yes,267
4,Cassie,Besemer,8.0,,,office@lovelivingathome.org,Phone,Yes,204


In [223]:
volunteer_df = volunteer_df.merge(phone_df, how='left', left_on=["Secondary Phone Number"],right_on=["Phone Number"])
local_resource_df = local_resource_df.merge(phone_df, how='left', left_on=["Secondary Phone Number"],right_on=["Phone Number"])
members_df = members_df.merge(phone_df, how='left', left_on=["Secondary Phone Number"],right_on=["Phone Number"])
volunteer_df['Secondary Phone Number'] = volunteer_df['Phone ID']
volunteer_df = volunteer_df.drop(['Phone ID','Phone Number'],axis=1)
local_resource_df['Secondary Phone Number'] = local_resource_df['Phone ID']
local_resource_df = local_resource_df.drop(['Phone ID','Phone Number'],axis=1)
members_df['Secondary Phone Number'] = members_df['Phone ID']
members_df = members_df.drop(['Phone ID','Phone Number'],axis=1)
volunteer_df = volunteer_df.merge(phone_df, how='left', left_on=["Emergency Contact Phone Number"],right_on=["Phone Number"])
local_resource_df = local_resource_df.merge(phone_df, how='left', left_on=["Emergency Contact Phone Number"],right_on=["Phone Number"])
members_df = members_df.merge(phone_df, how='left', left_on=["Emergency Contact Phone Number"],right_on=["Phone Number"])
volunteer_df['Emergency Contact Phone Number'] = volunteer_df['Phone ID']
volunteer_df = volunteer_df.drop(['Phone ID','Phone Number'],axis=1)
local_resource_df['Emergency Contact Phone Number'] = local_resource_df['Phone ID']
local_resource_df = local_resource_df.drop(['Phone ID','Phone Number'],axis=1)
members_df['Emergency Contact Phone Number'] = members_df['Phone ID']
members_df = members_df.drop(['Phone ID','Phone Number'],axis=1)
volunteer_df.head()

Unnamed: 0,First Name,Last Name,Primary Phone Number,Secondary Phone Number,Emergency Contact Phone Number,Email,Preferred Contact Method,Vetting,Address ID
0,Paula,Amols,3.0,,,pamols54@gmail.com,Email,Yes,4
1,Wendy,Aquadro,4.0,187.0,,gsa8@cornell.edu,Phone,Yes,5
2,Eileen,Berlow,7.0,189.0,,eileenberlow@yahoo.com,Phone,No,8
3,Deborah,Berman,297.0,,363.0,skaboooch@gmail.com,Email,Yes,267
4,Cassie,Besemer,8.0,,,office@lovelivingathome.org,Phone,Yes,204


In [224]:
# Export people data
members_df.to_csv(OUT_DATA_DIR+'members.csv')
volunteer_df.to_csv(OUT_DATA_DIR+'volunteers.csv')
local_resource_df.to_csv(OUT_DATA_DIR+'local_resources.csv')