In [1]:
import pandas as pd
import numpy as np

In [2]:
residences = pd.DataFrame(columns=['address', 'zipcode', 'latitude', 'longitude'])

survivors = pd.DataFrame(columns=['name', 'contact_1', 'contact_2', 'contact_3'])

contact_points = pd.DataFrame(columns = ['residence_id', 'survivor_id', 'date', 
                                         'water_lvl', 'damage_lvl', 'notes1', 'notes2', 
                                         'notes3', 'intaker'])

In [3]:
def find_or_add_residence(**kwargs):
    global residences
    if kwargs['address'] in residences['address'].values:
        res_id = residences.loc[residences['address']==kwargs['address']].index.tolist()[0]
    else:
        residences = residences.append(kwargs,ignore_index=True)
        res_id = residences.shape[0] - 1
    return res_id 

In [4]:
def find_or_add_survivor(**kwargs):
    global survivors
    if kwargs['name'] in survivors['name'].values:
        surv_id = survivors.loc[survivors['name']==kwargs['name']].index.tolist()[0]
#         for k, v in kwargs.items():
#             if v in survivors.loc[survivors['name']==kwargs['name']].values:
#                 print('yay')
#                 continue
#             else:
#                 for ind, val in survivors.loc[survivors['name']==kwargs['name']].iteritems():
#                     if val.isna().any():
#                         survivors.loc[survivors['name']==kwargs['name'], ind] = v
#                         break
    else:
        survivors = survivors.append(kwargs,ignore_index=True)
        surv_id = survivors.shape[0] - 1
    return surv_id
        

In [5]:
def create_contact_point(**kwargs):
    global contact_points
    contact_points = contact_points.append(kwargs, ignore_index=True)

    

In [6]:
nash_codes = pd.read_csv('../data/CodesDamageAssessment_4_15_21.csv', parse_dates=['CreationDate'], 
                         dtype={'Address':'object','Extent of Damage':'object','Type of Structure':'object',
                                'Water Level':'float64','Note':'object','Creator':'object'})
nash_codes = nash_codes.dropna(subset=['Address'])
nash_codes = nash_codes.drop_duplicates(ignore_index=True)

mask = (nash_codes['Type of Structure']=='Residential') & (nash_codes['Extent of Damage'] != 'None')
for ind, row in nash_codes[mask].iterrows():
    add = row['Address'].upper().strip()
    res_id = find_or_add_residence(address=add)
    surv_id = None
    create_contact_point(residence_id = res_id, survivor_id = surv_id, 
                         date = row['CreationDate'], water_lvl = row['Water Level'], 
                         damage_lvl = row['Extent of Damage'], notes1=row['Notes'], 
                         intaker='Nashville Codes: ' + row['Creator'])

In [7]:
oem = pd.read_csv('../data/OEM_4_12_21.csv', 
                         dtype={'PropHouse':'object','PropZip':'object','PropStreet':'object',
                                'Extent of Damage':'object','Water Level':'object','Notes':'object',
                                'Damage 2':'object'})

oem = oem.fillna('')
oem = oem.drop_duplicates(ignore_index=True)

mask = oem['Extent of Damage'] != 'None'
for ind, row in oem[mask].iterrows():
    add = row['PropHouse'].upper().strip() + ' ' + row['PropStreet'].upper().strip()
    res_id = find_or_add_residence(address=add)
    surv_id = None
    create_contact_point(residence_id = res_id, survivor_id = surv_id, date = pd.to_datetime('2021-04-12'), 
                         water_lvl = row['Water Level'], damage_lvl = row['Extent of Damage'], 
                         notes1='NOTES: '+row['Notes'], notes2='DAMAGE 2: ' + row['Damage 2'], intaker='OEM')

In [8]:
nerve_4_9 = pd.read_csv('../data/NERVE_040921.csv', parse_dates=[10],
                         dtype={'Street No.':'object','Street':'object','First Name':'object','Last Name':'object',
                                'Email':'object','Phone Number':'object','Structure Damage Level':'object',
                                'Property Damage Described':'object','Comments':'object',
                                'Other - Property Damage Described':'object'})
nerve_4_9 = nerve_4_9.fillna('')
nerve_4_9 = nerve_4_9.drop_duplicates(ignore_index=True)

for ind, row in nerve_4_9.iterrows():
    add = row['Street No.'].upper().strip() + ' ' + row['Street'].upper().strip()
    res_id = find_or_add_residence(address=add)
    name = row['First Name'].upper().strip() + ' ' + row['Last Name'].upper().strip()
    
    if name == ' ':
        surv_id = None
    else:
        surv_id = find_or_add_survivor(name = name, contact_1 = row['Phone Number'].strip(), contact_2 = row['Email'].strip())
    
    create_contact_point(residence_id = res_id, survivor_id = surv_id, date = row['CreationDate'], 
                         damage_lvl = row['Structure Damage Level'], intaker='NERVE: Self Reported',
                         notes1='Property Damage Described: ' + row['Property Damage Described'], 
                         notes2 = 'Comments:' + row['Comments'], 
                         notes3 = 'Other - Property Damage Described: ' + row['Other - Property Damage Described'])

In [9]:
contact_points.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 852 entries, 0 to 851
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   residence_id  852 non-null    object        
 1   survivor_id   26 non-null     object        
 2   date          852 non-null    datetime64[ns]
 3   water_lvl     500 non-null    object        
 4   damage_lvl    852 non-null    object        
 5   notes1        509 non-null    object        
 6   notes2        462 non-null    object        
 7   notes3        220 non-null    object        
 8   intaker       852 non-null    object        
dtypes: datetime64[ns](1), object(8)
memory usage: 60.0+ KB


In [10]:
canvass_3_30 = pd.read_csv('../data/canvass_3_30.csv',
                         dtype={'Street Number':'object','Street Name':'object','Name':'object',
                                'Language Preference':'object','Saturday Help':'object',
                                'Phone Number':'object','Notes':'object'})
canvass_3_30 = canvass_3_30.fillna('')
canvass_3_30 = canvass_3_30.drop_duplicates(ignore_index=True)

for ind, row in canvass_3_30.iterrows():
    add = row['Street Number'].upper().strip() + ' ' + row['Street Name'].upper().strip()
    res_id = find_or_add_residence(address=add)
    name = row['Name'].upper().strip()
    if name == '':
        surv_id = None
    else:
        surv_id = find_or_add_survivor(name = name, contact_1 = row['Phone Number'].strip())
    create_contact_point(residence_id = res_id, survivor_id = surv_id, date = pd.to_datetime('2021-03-31'),
                         intaker='Canvass Volunteer from Hands On Nashville',
                         notes1= 'Language Preference: ' + row['Language Preference'], 
                         notes2 = 'Notes' + row['Notes'], 
                         notes3 ='Followup: ' + row['Saturday Help'])

In [11]:
contact_points.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   residence_id  891 non-null    object        
 1   survivor_id   59 non-null     object        
 2   date          891 non-null    datetime64[ns]
 3   water_lvl     500 non-null    object        
 4   damage_lvl    852 non-null    object        
 5   notes1        548 non-null    object        
 6   notes2        501 non-null    object        
 7   notes3        259 non-null    object        
 8   intaker       891 non-null    object        
dtypes: datetime64[ns](1), object(8)
memory usage: 62.8+ KB


In [12]:
canvass_4_2 = pd.read_csv('../data/canvass_4_2.csv', parse_dates=[2],
                         dtype={'What is the house number?':'object','What is the street name?':'object',
                                'Is there visible debris on the property?':'object','Resident Name':'object',
                                'Resident phone number':'object','Language?':'object',
                                'Was the home impacted? (Either can you see this visually or did the resident disclose?)':'object',
                                'Does the resident want volunteers to assist with response?':'object',
                                'Does the resident need any of the following?':'object'})

canvass_4_2 = canvass_4_2.fillna('')
canvass_4_2 = canvass_4_2.drop_duplicates(ignore_index=True)


mask = canvass_4_2['Was the home impacted? (Either can you see this visually or did the resident disclose?)'] == 'Yes'
for ind, row in canvass_4_2[mask].iterrows():
    add = row['What is the house number?'].upper().strip() + ' ' + row['What is the street name?'].upper().strip()
    res_id = find_or_add_residence(address=add)
    name = row['Resident Name'].upper().strip()
    if name == '':
        surv_id = None
    else:
        surv_id = find_or_add_survivor(name = name, contact_1 = row['Resident phone number'].strip())
    create_contact_point(residence_id = res_id, survivor_id = surv_id, date = row['Completion time'],
                         intaker='Canvass Volunteer from Hands On Nashville',
                         notes1= 'Language: ' + row['Language?'], 
                         notes2 = 'Requested Assistance: ' + row['Does the resident want volunteers to assist with response?'], 
                         notes3 ='Specific Needs: ' + row['Does the resident need any of the following?'])

In [13]:
nash_responds = pd.read_csv('../data/nash_responds_4_6.csv',
                         dtype={'First Name':'object','Last Name':'object','Address 1':'object',
                                'Email':'object','Phone':'object','Request Details':'object'})
nash_responds = nash_responds.drop_duplicates(ignore_index=True)



for ind, row in nash_responds.iterrows():
    add = row['Address 1'].upper().strip()
    res_id = find_or_add_residence(address=add)
    name = row['First Name'].upper().strip() + ' ' + row['Last Name'].upper().strip()
    
    if name == ' ':
        surv_id = None
    else:
        surv_id = find_or_add_survivor(name = name, contact_1 = row['Phone'].strip(), contact_2 = row['Email'])
    
    create_contact_point(residence_id = res_id, survivor_id = surv_id, date = pd.to_datetime('2021-04-06'), 
                         intaker='Nashville Responds: Self Reported',
                         notes1='Request Details: ' + row['Request Details'])

In [28]:
contact_points['damage_lvl'].isna().sum()

300

In [26]:
survivors[survivors['name'] == 'DEBORAH GAINS']

Unnamed: 0,name,contact_1,contact_2,contact_3
