In [None]:
import pandas as pd

ROOT_DATA_DIR = '../in/club_express_exports'
OUT_DATA_DIR = '../out/'

## Read in data
Read in the services data and remove unnecessary columns (Visible? and Notes)

In [None]:
service_df = pd.read_csv(f"{ROOT_DATA_DIR}/service_data.csv")
visible_services_df = service_df.loc[service_df['Visible?']=='Yes']
trimmed_visible_services_df = visible_services_df.drop(['Visible?', 'Notes'], axis=1)
trimmed_visible_services_df.head()

Next, we need the request type that each category belongs to. This is relevant because, when creating a type of request (i.e., Transportation Request), the service categories are filtered to only display the categories belonging to the request type.

In [None]:
completed_and_requested_services_df = pd.read_csv(f"{ROOT_DATA_DIR}/service_provided_service_data.csv")
trimmed_service_info_df = completed_and_requested_services_df[['Service', 'Service Category', 'Request Type']].drop_duplicates()
trimmed_service_info_df

## Parse data for H4I Services Dataframe

In [None]:
# This H4I dataframe will hold all our relevant information.
h4i_service_info_df = pd.DataFrame(columns=['Service', 'Service Category', 'Request Type'])

# Accumulate columns in these lists
services = []
service_categories = []
request_types = []

# Iterate through all current services and insert relevant information into the lists for H4I dataframe columns
for row in trimmed_visible_services_df[['Service', 'Service Category']].iterrows():
    # Get Service, Service Category, and Service Category ID
    service, service_category = row[1].values
    # Get Request Type
    request_type_values = trimmed_service_info_df[trimmed_service_info_df['Service'] == service]['Request Type'].values
    num_request_type_values = len(request_type_values)
    try:
        assert(num_request_type_values <= 1) # Check that there's either 0 or 1 request types associated with this service
    except AssertionError: 
        if "Contractor Referral" in request_type_values:
            list(request_type_values).remove("Contractor Referral")
            num_request_type_values-=1
            if num_request_type_values == 1: # after removing 'Contractor Referral'
                break
        print("ASSERTION ERROR")
        print(request_type_values)
    request_type = None
    if len(request_type_values) == 1:
        request_type = request_type_values[0]
        
    # Append to lists
    services.append(service)
    service_categories.append(service_category)
    request_types.append(request_type)

Construct our dataframe with **Service Category** and **Request Type** for every **Service**.

In [None]:
h4i_service_info_df['Service'] = services
h4i_service_info_df['Service Category'] = service_categories
h4i_service_info_df['Request Type'] = request_types

In [None]:
h4i_service_info_df.head()

## The request type needs to be filled in for some service categories/services.

In [None]:
h4i_service_info_df.loc[h4i_service_info_df['Request Type'].isnull()]

Fix these rows with "None" as Request Type.

In [None]:
h4i_service_info_df.loc[[0, 1, 2, 5, 6, 7, 18, 22, 24], 'Request Type']= "Member's Home"

Drop the "hack4impact test service" we created in the system.

In [None]:
h4i_service_info_df.drop(28, inplace=True); # drop 'hack4impact test service'

Check that there are no more "None" values in Request Type.

In [None]:
h4i_service_info_df.loc[h4i_service_info_df['Request Type'].isnull()]

Remove "Contractor Referral" Requests

In [None]:
h4i_service_info_df = h4i_service_info_df[h4i_service_info_df['Request Type'] != 'Contractor Referral']
h4i_service_info_df.head()

## Create tables of services, service categories, and request types with their IDs

In [None]:
service_categories_set = list(set(h4i_service_info_df['Service Category']))
services_set = list(set(h4i_service_info_df['Service']))
request_types_set = list(set(h4i_service_info_df['Request Type']))

In [None]:
h4i_request_types_df = pd.DataFrame()
h4i_request_types_df['Name'] = request_types_set
# h4i_request_types_df.to_csv(f'{OUT_DATA_DIR}/request_types.csv', index_label='ID')
h4i_request_types_df.head()

In [None]:
h4i_service_categories_df = pd.DataFrame()
h4i_service_categories_df['Name'] = service_categories_set

# Get Category -> Request Type mappings
category_and_request_type_info = h4i_service_info_df[['Service Category', 'Request Type']].drop_duplicates()
category_and_request_type_info.set_index('Service Category', inplace=True)

# Add request type IDs
request_types = [category_and_request_type_info.loc[category]['Request Type'] for category in h4i_service_categories_df['Name']]
print(request_types)
h4i_service_categories_df['Request Type ID'] = [h4i_request_types_df[h4i_request_types_df['Name'] == request_type].index[0] for request_type in request_types]

# h4i_service_categories_df.to_csv(f'{OUT_DATA_DIR}/service_categories.csv', index_label='ID')
h4i_service_categories_df.head()

In [None]:
h4i_services_df = pd.DataFrame()
h4i_services_df['Name'] = services_set

# Get Category IDs of services
categories_of_services = [h4i_service_info_df[h4i_service_info_df['Service'] == service]['Service Category'].values[0] for service in h4i_services_df['Name']]                
h4i_services_df['Category ID'] = [h4i_service_categories_df[h4i_service_categories_df['Name'] == category].index[0] for category in categories_of_services]

h4i_services_df.to_csv(f'{OUT_DATA_DIR}/services.csv', index_label='ID')
h4i_services_df.head()

## Now write the service info df.

In [None]:
h4i_service_categories_df 

In [None]:
[h4i_service_categories_df category in h4i_service_info_df['Service Category']]

# People Data

In [111]:
# Read in member data
members_df_input = pd.read_csv(f"{ROOT_DATA_DIR}/members_including_metro_area_data.csv")
members_df_input.head()

Unnamed: 0,Member Number,First Name,Last Name,Email,Address 1,Address 2,City,State,Zip,Metro Area,...,Sponsor Name,Chapter,Last Renewal Date,Secondary Type Name,Member Level,Gender,Allow Club Email?,Printed Newsletter?,Mailing Name,Use Alt Address?
0,229,Andrea,Aching,annaching2017@yahoo.com,115 Eddy Street,,Ithaca,NY,14850,,...,,,,Mother,Secondary,Female,Yes,No,Andrea Aching,No
1,228,Gerard,Aching,gerardaching@hotmail.com,115 Eddy Street,,Ithaca,NY,14850,,...,,,,,Primary,Male,Yes,Yes,Gerard L Aching,No
2,230,William,Aching,gerardaching@hotmail.com,115 Eddy Street,,Ithaca,NY,14850,,...,,,,Father,Secondary,Male,Yes,No,,No
3,233,Peggy,Adams,madams@ithaca.edu,417 Hector Street,,Ithaca,NY,14850,Downtown Ithaca,...,,,09/20/2021,,Primary,Female,Yes,No,Margaret Adams,No
4,259,Nancy,Ahlers,baselahlers@gmail.com,2 Horizon Drive,,Ithaca,NY,14850,Lansing,...,,,08/05/2021,,Primary,Female,Yes,No,Nancy Ahlers,No


In [112]:
# Drop irrelevant columns from members_df and rename as necessary
members_df = members_df_input[['Member Number','First Name', 'Middle Initial','Last Name',
                        'Gender','Nickname', 'Metro Area', 'Phone', 'Cell Phone',
                        'Email', 'Date Expired']]
members_df = members_df.rename(columns={"Nickname": "Preferred Name", "Phone": "Primary Phone Number", 
                           "Cell Phone": "Secondary Phone Number", "Email": "Email Address", "Date Expired": "Membership Expiration Date", "Metro Area": "Metro Area Id"})
members_df['Salutation'] = ""
members_df['Birthdate'] = ""
members_df['Volunteer Id'] = ""
members_df['Primary Address Id'] = ""
members_df['Secondary Address Id'] = ""
members_df['Preferred Contact Method']  = ""
members_df.head()

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Metro Area Id,Primary Phone Number,Secondary Phone Number,Email Address,Membership Expiration Date,Salutation,Birthdate,Volunteer Id,Primary Address Id,Secondary Address Id,Preferred Contact Method
0,229,Andrea,,Aching,Female,,,6073194170.0,7073383665.0,annaching2017@yahoo.com,09/09/2020,,,,,,
1,228,Gerard,L,Aching,Male,,,6073194170.0,9175455737.0,gerardaching@hotmail.com,09/09/2020,,,,,,
2,230,William,,Aching,Male,,,6073194170.0,,gerardaching@hotmail.com,09/09/2020,,,,,,
3,233,Peggy,,Adams,Female,,Downtown Ithaca,6078821659.0,6078821659.0,madams@ithaca.edu,09/26/2022,,,,,,
4,259,Nancy,,Ahlers,Female,,Lansing,,,baselahlers@gmail.com,07/20/2022,,,,,,


In [113]:
# Read in emergency contact information and rename columns
members_emergency_contact = pd.read_csv(f"{ROOT_DATA_DIR}/member_emergency_contact.csv")
members_emergency_contact = members_emergency_contact.rename(columns={"Contact Name": "Emergency Contact Name", 
                                                                      "Relationship": "Emergency Contact Relationship", 
                                                                      "Contact Email": "Emergency Contact Email Address",
                                                                     "Contact Phone": "Emergency Contact Phone Number"})
members_emergency_contact.head()

Unnamed: 0,Member Name,Emergency Contact Name,Emergency Contact Relationship,Emergency Contact Email Address,Emergency Contact Phone Number
0,Peggy Adams,,,,
1,Nancy Ahlers,,,,
2,Dilmeran Akgoze,Anita Racine,close friend and retired from Cornell,,607-273-1235
3,Paula Amols,,,,
4,Wendy Aquadro,,,,


In [114]:
# Add member emergency contact information
members_df['Member Name'] = members_df['First Name'] + ' ' + members_df['Last Name']
members_df = members_df.merge(members_emergency_contact, how='left',
                 left_on='Member Name', right_on='Member Name')
members_df.head()

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Metro Area Id,Primary Phone Number,Secondary Phone Number,Email Address,...,Birthdate,Volunteer Id,Primary Address Id,Secondary Address Id,Preferred Contact Method,Member Name,Emergency Contact Name,Emergency Contact Relationship,Emergency Contact Email Address,Emergency Contact Phone Number
0,229,Andrea,,Aching,Female,,,6073194170.0,7073383665.0,annaching2017@yahoo.com,...,,,,,,Andrea Aching,,,,
1,228,Gerard,L,Aching,Male,,,6073194170.0,9175455737.0,gerardaching@hotmail.com,...,,,,,,Gerard Aching,,,,
2,230,William,,Aching,Male,,,6073194170.0,,gerardaching@hotmail.com,...,,,,,,William Aching,,,,
3,233,Peggy,,Adams,Female,,Downtown Ithaca,6078821659.0,6078821659.0,madams@ithaca.edu,...,,,,,,Peggy Adams,,,,
4,259,Nancy,,Ahlers,Female,,Lansing,,,baselahlers@gmail.com,...,,,,,,Nancy Ahlers,,,,


In [115]:
# Now drop the extra column we created
members_df.drop(['Member Name'],axis=1)

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Metro Area Id,Primary Phone Number,Secondary Phone Number,Email Address,...,Salutation,Birthdate,Volunteer Id,Primary Address Id,Secondary Address Id,Preferred Contact Method,Emergency Contact Name,Emergency Contact Relationship,Emergency Contact Email Address,Emergency Contact Phone Number
0,229,Andrea,,Aching,Female,,,6073194170,7073383665,annaching2017@yahoo.com,...,,,,,,,,,,
1,228,Gerard,L,Aching,Male,,,6073194170,9175455737,gerardaching@hotmail.com,...,,,,,,,,,,
2,230,William,,Aching,Male,,,6073194170,,gerardaching@hotmail.com,...,,,,,,,,,,
3,233,Peggy,,Adams,Female,,Downtown Ithaca,6078821659,6078821659,madams@ithaca.edu,...,,,,,,,,,,
4,259,Nancy,,Ahlers,Female,,Lansing,,,baselahlers@gmail.com,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,420,Linda,,Woodard,Female,,Cayuga Heights,6072279798,,lab6@cornell.edu,...,,,,,,,Allison Pfuntner,Daughter,allisonpeters3@yahoo.com,301-503-1123
326,234,Rochelle,,Woods,Female,,,607-277-3497,607-339-1956,rewoods112@gmail.com,...,,,,,,,,,,
327,43,Eugene,,Yarussi,Male,Gene,,(607) 277-6520,(607) 279-6613,eyarussi@gmail.com,...,,,,,,,,,,
328,44,Jeanne,,Yarussi,Female,,Caroline,(607) 277-6520,(607) 279-6613,eyarussi@gmail.com,...,,,,,,,,,,


In [116]:
# Read in member notes information and preprocess out irrelevant columns, rename relevant..
members_notes = pd.read_csv(f"{ROOT_DATA_DIR}/member_notes.csv")
members_notes.reset_index(inplace=True)
members_notes = members_notes.drop(['level_1', 'Name', 'Phone'], axis=1)
members_notes = members_notes.rename(columns={'level_0': 'Member Name', 'Email':'Staffer Notes'})
members_notes["Volunteer Notes"] = ""
members_notes.head()

Unnamed: 0,Member Name,Staffer Notes,Volunteer Notes
0,"Dubovi, Robin","Prefers not to drive in snowy, bad-weather con...",
1,"Lemley, Ann",Committee Member Only,
2,"Cathles, Mary Helen",Larry Cathles spouse\r\n607-533-7589 (h)\r\n60...,
3,"Regenstein, Carrie",Emergency Contact: \r\nJoe Regenstein husband\...,
4,"Jaquette, John","Emergency Contact is Susan Jaquette, wife\r\ne...",


In [117]:
# Add member notes to members df
members_df['Member Name'] = members_df['Last Name'] + ', ' + members_df['First Name']
members_df = members_df.merge(members_notes, how='left',
                 left_on='Member Name', right_on='Member Name',)
members_df.head()

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Metro Area Id,Primary Phone Number,Secondary Phone Number,Email Address,...,Primary Address Id,Secondary Address Id,Preferred Contact Method,Member Name,Emergency Contact Name,Emergency Contact Relationship,Emergency Contact Email Address,Emergency Contact Phone Number,Staffer Notes,Volunteer Notes
0,229,Andrea,,Aching,Female,,,6073194170.0,7073383665.0,annaching2017@yahoo.com,...,,,,"Aching, Andrea",,,,,,
1,228,Gerard,L,Aching,Male,,,6073194170.0,9175455737.0,gerardaching@hotmail.com,...,,,,"Aching, Gerard",,,,,,
2,230,William,,Aching,Male,,,6073194170.0,,gerardaching@hotmail.com,...,,,,"Aching, William",,,,,,
3,233,Peggy,,Adams,Female,,Downtown Ithaca,6078821659.0,6078821659.0,madams@ithaca.edu,...,,,,"Adams, Peggy",,,,,,
4,259,Nancy,,Ahlers,Female,,Lansing,,,baselahlers@gmail.com,...,,,,"Ahlers, Nancy",,,,,,


In [118]:
# Now drop the extra column we created
members_df.drop(['Member Name'],axis=1)

Unnamed: 0,Member Number,First Name,Middle Initial,Last Name,Gender,Preferred Name,Metro Area Id,Primary Phone Number,Secondary Phone Number,Email Address,...,Volunteer Id,Primary Address Id,Secondary Address Id,Preferred Contact Method,Emergency Contact Name,Emergency Contact Relationship,Emergency Contact Email Address,Emergency Contact Phone Number,Staffer Notes,Volunteer Notes
0,229,Andrea,,Aching,Female,,,6073194170,7073383665,annaching2017@yahoo.com,...,,,,,,,,,,
1,228,Gerard,L,Aching,Male,,,6073194170,9175455737,gerardaching@hotmail.com,...,,,,,,,,,,
2,230,William,,Aching,Male,,,6073194170,,gerardaching@hotmail.com,...,,,,,,,,,,
3,233,Peggy,,Adams,Female,,Downtown Ithaca,6078821659,6078821659,madams@ithaca.edu,...,,,,,,,,,,
4,259,Nancy,,Ahlers,Female,,Lansing,,,baselahlers@gmail.com,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,420,Linda,,Woodard,Female,,Cayuga Heights,6072279798,,lab6@cornell.edu,...,,,,,Allison Pfuntner,Daughter,allisonpeters3@yahoo.com,301-503-1123,,
326,234,Rochelle,,Woods,Female,,,607-277-3497,607-339-1956,rewoods112@gmail.com,...,,,,,,,,,,
327,43,Eugene,,Yarussi,Male,Gene,,(607) 277-6520,(607) 279-6613,eyarussi@gmail.com,...,,,,,,,,,,
328,44,Jeanne,,Yarussi,Female,,Caroline,(607) 277-6520,(607) 279-6613,eyarussi@gmail.com,...,,,,,,,,,,


In [122]:
# Create an ID column
members_df['ID'] = range(0, len(members_df))
members_df.head()

Unnamed: 0,ID,First Name,Middle Initial,Last Name,Gender,Preferred Name,Metro Area Id,Primary Phone Number,Secondary Phone Number,Email Address,...,Primary Address Id,Secondary Address Id,Preferred Contact Method,Member Name,Emergency Contact Name,Emergency Contact Relationship,Emergency Contact Email Address,Emergency Contact Phone Number,Staffer Notes,Volunteer Notes
0,0,Andrea,,Aching,Female,,,6073194170.0,7073383665.0,annaching2017@yahoo.com,...,,,,"Aching, Andrea",,,,,,
1,1,Gerard,L,Aching,Male,,,6073194170.0,9175455737.0,gerardaching@hotmail.com,...,,,,"Aching, Gerard",,,,,,
2,2,William,,Aching,Male,,,6073194170.0,,gerardaching@hotmail.com,...,,,,"Aching, William",,,,,,
3,3,Peggy,,Adams,Female,,Downtown Ithaca,6078821659.0,6078821659.0,madams@ithaca.edu,...,,,,"Adams, Peggy",,,,,,
4,4,Nancy,,Ahlers,Female,,Lansing,,,baselahlers@gmail.com,...,,,,"Ahlers, Nancy",,,,,,


In [130]:
final_members_df = members_df.rename(columns={"ID": "id", "Salutation": "salutation", "First Name": "first_name", "Middle Initial" : "middle_initial", "Last Name" : "last_name", "Preferred Name" : "preferred_name", "Gender" : "gender", "Birthdate" : "birthdate", "Volunteer Id" : "volunteer_id", "Member Number" : "member_number", "Membership Expiration Date" : "membership_expiration_date", "Primary Address Id" : "primary_address_id", "Secondary Address Id" : "secondary_address_id", "Primary Phone Number": "primary_phone_number", "Secondary Phone Number" : "secondary_phone_number", "Email Address" : "email_address", "Preferred Contact Method" : "preferred_contact_method", "Emergency Contact Name" : "emergency_contact_name", "Emergency Contact Phone Number" : "emergency_contact_phone_number", "Emergency Contact Email Address" : "emergency_contact_email_address", "Emergency Contact Relationship" : "emergency_contact_relationship", "Volunteer Notes" : "volunteer_notes", "Staffer Notes" : "staffer_notes"})

In [131]:
list(final_members_df.columns)

['id',
 'first_name',
 'middle_initial',
 'last_name',
 'gender',
 'preferred_name',
 'Metro Area Id',
 'primary_phone_number',
 'secondary_phone_number',
 'email_address',
 'membership_expiration_date',
 'salutation',
 'birthdate',
 'volunteer_id',
 'primary_address_id',
 'secondary_address_id',
 'preferred_contact_method',
 'Member Name',
 'emergency_contact_name',
 'emergency_contact_relationship',
 'emergency_contact_email_address',
 'emergency_contact_phone_number',
 'staffer_notes',
 'volunteer_notes']

In [95]:
# Read in service provider data
service_provider_df = pd.read_csv(f"{ROOT_DATA_DIR}/service_provider_data.csv")
service_provider_df.head()

Unnamed: 0,Service Provider First Name,Service Provider Laast Name,Type,Company,Address,City,State,Zip,Phone,Cell Phone,Emergency Contact Phone #,Email,Preferred Contact Method,Website,Fully Vetted?,Visible?,Discount Notes
0,,,Non-Member Contractor,AccuFab,232 Cherry St.,Ithaca,NY,14850,(607)273-3706,,,mike@accufabinc.com,Both,http://accufabinc.com/,No,No,
1,,,Non-Member Contractor,Ace Security,720 W. Green St.,Ithaca,NY,14850,6072738840,,,,Phone,,No,No,
2,,,Non-Member Contractor,ACME Pest Control,359 Dryden-Harford Rd.,Dryden,NY,13053,607-844-8689,,,,Phone,,No,No,
3,,,Non-Member Contractor,Austin's Helping Hands LLC,225 S. Fulton St.,Ithaca,NY,14850,6073399321,,,,Phone,,No,No,
4,,,Non-Member Contractor,Bailey Construction,5139 Jacksonville Road,Trumansburg,NY,14886,607-209-4114,,,,Phone,www.baileyconstruction.vpweb.com,No,No,


In [96]:
# Take volunteer data out
volunteer_df = service_provider_df[service_provider_df.Type != 'Non-Member Contractor']
volunteer_df.head()

Unnamed: 0,Service Provider First Name,Service Provider Laast Name,Type,Company,Address,City,State,Zip,Phone,Cell Phone,Emergency Contact Phone #,Email,Preferred Contact Method,Website,Fully Vetted?,Visible?,Discount Notes
51,Paula,Amols,Member Volunteer,,370 Snyder Hill Rd.,Ithaca,NY,14850,(607) 592-1166,,,pamols54@gmail.com,Email,,Yes,Yes,
52,Wendy,Aquadro,Member Volunteer,,283 ELLIS HOLLOW CREEK RD,ITHACA,NY,14850-9619,6072733049,6073516275,,gsa8@cornell.edu,Phone,,Yes,Yes,
55,Jill,Baldwin,Non-Member Volunteer,Favorite Life Farm,7619 POTTER MORGAN RD,INTERLAKEN,NY,14847-9665,3155214574,3155214574,,jmbaldwin211@gmail.com,Email,,No,No,
57,Eileen,Berlow,Member Volunteer,,2444 Perry City Road,Ithaca,NY,14850,(607) 379-3565,(607) 387-6890,,eileenberlow@yahoo.com,Phone,,No,Yes,
58,Deborah,Berman,Non-Member Volunteer,,110 Hancock St,Ithaca,NY,14850,(607) 273-2901,,(607) 342-7841,skaboooch@gmail.com,Email,,Yes,Yes,


In [97]:
list(volunteer_df.columns)

['Service Provider First Name',
 'Service Provider Laast Name',
 'Type',
 'Company',
 'Address',
 'City',
 'State',
 'Zip',
 'Phone',
 'Cell Phone',
 'Emergency Contact Phone #',
 'Email',
 'Preferred Contact Method',
 'Website',
 'Fully Vetted?',
 'Visible?',
 'Discount Notes']

In [98]:
# Preprocess volunteer data by dropping irrelevant data, renaming..
volunteer_df = volunteer_df.drop(['Type', 'Company', 'Website','Visible?','Discount Notes'],axis=1)
volunteer_df = volunteer_df.rename(columns={'Service Provider First Name': 'First Name', 
                                            'Service Provider Laast Name': 'Last Name',
                                           'Emergency Contact Phone #':'Emergency Contact Phone Number',
                                           'Fully Vetted?': 'Vetting',
                                           'Phone': 'Primary Phone Number',
                                           'Cell Phone': 'Secondary Phone Number'})
volunteer_df.head()

Unnamed: 0,First Name,Last Name,Address,City,State,Zip,Primary Phone Number,Secondary Phone Number,Emergency Contact Phone Number,Email,Preferred Contact Method,Vetting
51,Paula,Amols,370 Snyder Hill Rd.,Ithaca,NY,14850,(607) 592-1166,,,pamols54@gmail.com,Email,Yes
52,Wendy,Aquadro,283 ELLIS HOLLOW CREEK RD,ITHACA,NY,14850-9619,6072733049,6073516275,,gsa8@cornell.edu,Phone,Yes
55,Jill,Baldwin,7619 POTTER MORGAN RD,INTERLAKEN,NY,14847-9665,3155214574,3155214574,,jmbaldwin211@gmail.com,Email,No
57,Eileen,Berlow,2444 Perry City Road,Ithaca,NY,14850,(607) 379-3565,(607) 387-6890,,eileenberlow@yahoo.com,Phone,No
58,Deborah,Berman,110 Hancock St,Ithaca,NY,14850,(607) 273-2901,,(607) 342-7841,skaboooch@gmail.com,Email,Yes


In [99]:
list(volunteer_df.columns)

['First Name',
 'Last Name',
 'Address',
 'City',
 'State',
 'Zip',
 'Primary Phone Number',
 'Secondary Phone Number',
 'Emergency Contact Phone Number',
 'Email',
 'Preferred Contact Method',
 'Vetting']

In [107]:
# Take out local resource data
local_resource_df = service_provider_df[service_provider_df.Type == 'Non-Member Contractor']
local_resource_df.head()

Unnamed: 0,Service Provider First Name,Service Provider Laast Name,Type,Company,Address,City,State,Zip,Phone,Cell Phone,Emergency Contact Phone #,Email,Preferred Contact Method,Website,Fully Vetted?,Visible?,Discount Notes
0,,,Non-Member Contractor,AccuFab,232 Cherry St.,Ithaca,NY,14850,(607)273-3706,,,mike@accufabinc.com,Both,http://accufabinc.com/,No,No,
1,,,Non-Member Contractor,Ace Security,720 W. Green St.,Ithaca,NY,14850,6072738840,,,,Phone,,No,No,
2,,,Non-Member Contractor,ACME Pest Control,359 Dryden-Harford Rd.,Dryden,NY,13053,607-844-8689,,,,Phone,,No,No,
3,,,Non-Member Contractor,Austin's Helping Hands LLC,225 S. Fulton St.,Ithaca,NY,14850,6073399321,,,,Phone,,No,No,
4,,,Non-Member Contractor,Bailey Construction,5139 Jacksonville Road,Trumansburg,NY,14886,607-209-4114,,,,Phone,www.baileyconstruction.vpweb.com,No,No,


In [103]:
list(local_resource_df.columns)

['Service Provider First Name',
 'Service Provider Laast Name',
 'Type',
 'Company',
 'Address',
 'City',
 'State',
 'Zip',
 'Phone',
 'Cell Phone',
 'Emergency Contact Phone #',
 'Email',
 'Preferred Contact Method',
 'Website',
 'Fully Vetted?',
 'Visible?',
 'Discount Notes']

In [104]:
local_resource_df = local_resource_df.drop(['Type','Fully Vetted?', 'Visible?'],axis=1)
local_resource_df = local_resource_df.rename(columns={'Service Provider First Name': 'Contact First Name', 
                                            'Service Provider Laast Name': 'Contact Last Name',
                                           'Emergency Contact Phone #':'Emergency Contact Phone Number',
                                           'Phone': 'Primary Phone Number',
                                           'Cell Phone': 'Secondary Phone Number',
                                            'Discount Notes': 'Notes'})
local_resource_df.head()

Unnamed: 0,Contact First Name,Contact Last Name,Company,Address,City,State,Zip,Primary Phone Number,Secondary Phone Number,Emergency Contact Phone Number,Email,Preferred Contact Method,Website,Notes
0,,,AccuFab,232 Cherry St.,Ithaca,NY,14850,(607)273-3706,,,mike@accufabinc.com,Both,http://accufabinc.com/,
1,,,Ace Security,720 W. Green St.,Ithaca,NY,14850,6072738840,,,,Phone,,
2,,,ACME Pest Control,359 Dryden-Harford Rd.,Dryden,NY,13053,607-844-8689,,,,Phone,,
3,,,Austin's Helping Hands LLC,225 S. Fulton St.,Ithaca,NY,14850,6073399321,,,,Phone,,
4,,,Bailey Construction,5139 Jacksonville Road,Trumansburg,NY,14886,607-209-4114,,,,Phone,www.baileyconstruction.vpweb.com,


In [110]:
list(local_resource_df.columns)

['Service Provider First Name',
 'Service Provider Laast Name',
 'Type',
 'Company',
 'Address',
 'City',
 'State',
 'Zip',
 'Phone',
 'Cell Phone',
 'Emergency Contact Phone #',
 'Email',
 'Preferred Contact Method',
 'Website',
 'Fully Vetted?',
 'Visible?',
 'Discount Notes']

## Metro Areas

In [None]:
# Create and save metro area df
metro_areas = pd.DataFrame(['Brooktondale', 'Caroline', 'Danby', 'Downtown Ithaca', 'Dryden', 'Enfield', 'Freeville', 'Groton', 'Lansing', 'Newfield', 'Outside Tompkins County', 'Slaterville Springs', 'South Hill', 'Tompkins County', 'Trumansburg', 'Jacksonville', 'Ulysses', 'Vana', 'West Hill'])
metro_areas.to_csv(OUT_DATA_DIR+'metro_areas.csv')

## Address ID Replacement

In [None]:
# Add volunteer and local resource addresses to address book
column_names = ['Address','City','State','Zip']
address_data = []
for idx, row in members_df.iterrows():
    d = {}
    for c in column_names:
        d[c] = row[c]
    address_data.append(d)
for idx, row in local_resource_df.iterrows():
    d = {}
    for c in column_names:
        d[c] = row[c]
    address_data.append(d)
for idx, row in volunteer_df.iterrows():
    d = {}
    for c in column_names:
        d[c] = row[c]
    address_data.append(d)
address_df = pd.DataFrame(address_data)
address_df = address_df.drop_duplicates()
address_df = address_df.reset_index(drop=True)
address_df = address_df.reset_index()
address_df = address_df.rename(columns={'index': 'Address ID'})
address_df.head()

In [109]:
# Export addresses
address_df.to_csv(OUT_DATA_DIR+'addresses.csv')

NameError: name 'address_df' is not defined

In [108]:
# Merge address IDs in to volunteer and local resource data
volunteer_df = volunteer_df.merge(address_df, how='left', on=["Address","City","State","Zip"])
local_resource_df = local_resource_df.merge(address_df, how='left', on=["Address","City","State","Zip"])
members_df = members_df.merge(address_df, how='left', on=["Address","City","State","Zip"])

NameError: name 'address_df' is not defined

In [91]:
# Replace addresses in members with address ID
volunteer_df = volunteer_df.drop(['Address','City','State','Zip'], axis=1)
local_resource_df = local_resource_df.drop(['Address','City','State','Zip'], axis=1)
members_df = members_df.drop(['Address','City','State','Zip'], axis=1)
volunteer_df.head()

NameError: name 'volunteer_df' is not defined

In [None]:
# Phone Number df
phone_numbers = list(members_df['Primary Phone Number'])
phone_numbers.extend(list(members_df['Secondary Phone Number']))
phone_numbers.extend(list(members_df['Emergency Contact Phone Number']))
phone_numbers.extend(list(volunteer_df['Primary Phone Number']))
phone_numbers.extend(list(volunteer_df['Secondary Phone Number']))
phone_numbers.extend(list(volunteer_df['Emergency Contact Phone Number']))
phone_numbers.extend(list(local_resource_df['Primary Phone Number']))
phone_numbers.extend(list(local_resource_df['Secondary Phone Number']))
phone_numbers.extend(list(local_resource_df['Emergency Contact Phone Number']))
phone_df = pd.DataFrame(phone_numbers)
phone_df = phone_df.drop_duplicates()
phone_df = phone_df[phone_df[0].notna()]
phone_df = phone_df.reset_index(drop=True)
phone_df = phone_df.reset_index()
phone_df = phone_df.rename(columns={'index': 'Phone ID', 0:'Phone Number'})
phone_df.head()

In [None]:
# Export phone numbers
phone_df.to_csv(OUT_DATA_DIR+'phone_numbers.csv')

In [90]:
volunteer_df = volunteer_df.merge(phone_df, how='left', left_on=["Primary Phone Number"],right_on=["Phone Number"])
local_resource_df = local_resource_df.merge(phone_df, how='left', left_on=["Primary Phone Number"],right_on=["Phone Number"])
members_df = members_df.merge(phone_df, how='left', left_on=["Primary Phone Number"],right_on=["Phone Number"])
volunteer_df['Primary Phone Number'] = volunteer_df['Phone ID']
volunteer_df = volunteer_df.drop(['Phone ID','Phone Number'],axis=1)
local_resource_df['Primary Phone Number'] = local_resource_df['Phone ID']
local_resource_df = local_resource_df.drop(['Phone ID','Phone Number'],axis=1)
members_df['Primary Phone Number'] = members_df['Phone ID']
members_df = members_df.drop(['Phone ID','Phone Number'],axis=1)
volunteer_df.head()

NameError: name 'volunteer_df' is not defined

In [89]:
volunteer_df = volunteer_df.merge(phone_df, how='left', left_on=["Secondary Phone Number"],right_on=["Phone Number"])
local_resource_df = local_resource_df.merge(phone_df, how='left', left_on=["Secondary Phone Number"],right_on=["Phone Number"])
members_df = members_df.merge(phone_df, how='left', left_on=["Secondary Phone Number"],right_on=["Phone Number"])
volunteer_df['Secondary Phone Number'] = volunteer_df['Phone ID']
volunteer_df = volunteer_df.drop(['Phone ID','Phone Number'],axis=1)
local_resource_df['Secondary Phone Number'] = local_resource_df['Phone ID']
local_resource_df = local_resource_df.drop(['Phone ID','Phone Number'],axis=1)
members_df['Secondary Phone Number'] = members_df['Phone ID']
members_df = members_df.drop(['Phone ID','Phone Number'],axis=1)
volunteer_df = volunteer_df.merge(phone_df, how='left', left_on=["Emergency Contact Phone Number"],right_on=["Phone Number"])
local_resource_df = local_resource_df.merge(phone_df, how='left', left_on=["Emergency Contact Phone Number"],right_on=["Phone Number"])
members_df = members_df.merge(phone_df, how='left', left_on=["Emergency Contact Phone Number"],right_on=["Phone Number"])
volunteer_df['Emergency Contact Phone Number'] = volunteer_df['Phone ID']
volunteer_df = volunteer_df.drop(['Phone ID','Phone Number'],axis=1)
local_resource_df['Emergency Contact Phone Number'] = local_resource_df['Phone ID']
local_resource_df = local_resource_df.drop(['Phone ID','Phone Number'],axis=1)
members_df['Emergency Contact Phone Number'] = members_df['Phone ID']
members_df = members_df.drop(['Phone ID','Phone Number'],axis=1)
volunteer_df.head()

NameError: name 'volunteer_df' is not defined

In [126]:
# Export people data
final_members_df.to_csv(OUT_DATA_DIR+'members.csv')
volunteer_df.to_csv(OUT_DATA_DIR+'volunteers.csv')
local_resource_df.to_csv(OUT_DATA_DIR+'local_resources.csv')