In [359]:
import pandas as pd
import numpy as np

In [360]:
residences = pd.DataFrame(columns=['address', 'zipcode', 'latitude', 'longitude'])

survivors = pd.DataFrame(columns=['name', 'contact_1', 'contact_2', 'contact_3'])

contact_points = pd.DataFrame(columns = ['residence_id', 'survivor_id', 'date', 
                                         'water_lvl', 'damage_lvl', 'notes1', 'notes2', 
                                         'notes3', 'intaker'])

In [361]:
def find_or_add_residence(**kwargs):
    global residences
    if kwargs['address'] in residences['address'].values:
        res_id = residences.loc[residences['address']==kwargs['address']].index.tolist()[0]
    else:
        residences = residences.append(kwargs,ignore_index=True)
        res_id = residences.shape[0] - 1
    return res_id 

In [362]:
def find_or_add_survivor(**kwargs):
    global survivors
    if kwargs['name'] in survivors['name'].values:
        surv_id = survivors.loc[survivors['name']==kwargs['name']].index.tolist()[0]
        for k, v in kwargs.items():
            if (v == survivors.loc[survivors['name']==kwargs['name']].values).any():
                continue
            else:
                for ind, val in survivors.loc[survivors['name']==kwargs['name']].iteritems():
                    if val.isna().any():
                        survivors.loc[survivors['name']==kwargs['name'], ind] = v
                        break
    else:
        survivors = survivors.append(kwargs,ignore_index=True)
        surv_id = survivors.shape[0] - 1
    return surv_id
        

In [363]:
def create_contact_point(**kwargs):
    global contact_points
    contact_points = contact_points.append(kwargs, ignore_index=True)

    

In [364]:
nash_codes = pd.read_csv('../data/CodesDamageAssessment_4_15_21.csv', parse_dates=['CreationDate'], 
                         dtype={'Address':'object','Extent of Damage':'object','Type of Structure':'object',
                                'Water Level':'float64','Note':'object','Creator':'object'})
nash_codes = nash_codes.dropna(subset=['Address'])
nash_codes = nash_codes.drop_duplicates(ignore_index=True)

mask = (nash_codes['Type of Structure']=='Residential') & (nash_codes['Extent of Damage'] != 'None')
for ind, row in nash_codes[mask].iterrows():
    add = row['Address'].upper().strip()
    res_id = find_or_add_residence(address=add)
    surv_id = None
    create_contact_point(residence_id = res_id, survivor_id = surv_id, 
                         date = row['CreationDate'], water_lvl = row['Water Level'], 
                         damage_lvl = row['Extent of Damage'], notes1=row['Notes'], 
                         intaker='Nashville Codes: ' + row['Creator'])

In [365]:
oem = pd.read_csv('../data/OEM_4_12_21.csv', 
                         dtype={'PropHouse':'object','PropZip':'object','PropStreet':'object',
                                'Extent of Damage':'object','Water Level':'object','Notes':'object',
                                'Damage 2':'object'})

oem = oem.fillna('')
oem = oem.drop_duplicates(ignore_index=True)

mask = oem['Extent of Damage'] != 'None'
for ind, row in oem[mask].iterrows():
    add = row['PropHouse'].upper().strip() + ' ' + row['PropStreet'].upper().strip()
    res_id = find_or_add_residence(address=add)
    surv_id = None
    create_contact_point(residence_id = res_id, survivor_id = surv_id, date = pd.to_datetime('2021-04-12'), 
                         water_lvl = row['Water Level'], damage_lvl = row['Extent of Damage'], 
                         notes1='NOTES: '+row['Notes'], notes2='DAMAGE 2: ' + row['Damage 2'], intaker='OEM')

In [366]:
nerve_4_9 = pd.read_csv('../data/NERVE_040921.csv', parse_dates=[10],
                         dtype={'Street No.':'object','Street':'object','First Name':'object','Last Name':'object',
                                'Email':'object','Phone Number':'object','Structure Damage Level':'object',
                                'Property Damage Described':'object','Comments':'object',
                                'Other - Property Damage Described':'object'})
nerve_4_9 = nerve_4_9.fillna('')

for ind, row in nerve_4_9.iterrows():
    add = row['Street No.'].upper().strip() + ' ' + row['Street'].upper().strip()
    res_id = find_or_add_residence(address=add)
    name = row['First Name'].upper().strip() + ' ' + row['Last Name'].upper().strip()
    if name == ' ':
        surv_id = None
    else:
        surv_id = find_or_add_survivor(name = name, contact_1 = row['Phone Number'].strip(), contact_2 = row['Email'].strip())
    create_contact_point(residence_id = res_id, survivor_id = surv_id, date = row['CreationDate'], 
                         damage_lvl = row['Structure Damage Level'], intaker='NERVE: Self Reported',
                         notes1='Property Damage Described: ' + row['Property Damage Described'], 
                         notes2 = 'Comments:' + row['Comments'], 
                         notes3 = 'Other - Property Damage Described: ' + row['Other - Property Damage Described'])

In [367]:
contact_points.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 852 entries, 0 to 851
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   residence_id  852 non-null    object        
 1   survivor_id   26 non-null     object        
 2   date          852 non-null    datetime64[ns]
 3   water_lvl     500 non-null    object        
 4   damage_lvl    852 non-null    object        
 5   notes1        509 non-null    object        
 6   notes2        462 non-null    object        
 7   notes3        220 non-null    object        
 8   intaker       852 non-null    object        
dtypes: datetime64[ns](1), object(8)
memory usage: 60.0+ KB


In [368]:
canvass_3_30 = pd.read_csv('../data/canvass_3_30.csv',
                         dtype={'Street Number':'object','Street Name':'object','Name':'object',
                                'Language Preference':'object','Saturday Help':'object',
                                'Phone Number':'object','Notes':'object'})
canvass_3_30 = canvass_3_30.fillna('')

for ind, row in canvass_3_30.iterrows():
    add = row['Street Number'].upper().strip() + ' ' + row['Street Name'].upper().strip()
    res_id = find_or_add_residence(address=add)
    name = row['Name'].upper().strip()
    if name == '':
        surv_id = None
    else:
        surv_id = find_or_add_survivor(name = name, contact_1 = row['Phone Number'].strip())
    create_contact_point(residence_id = res_id, survivor_id = surv_id, date = pd.to_datetime('2021-03-31'),
                         intaker='Canvass Volunteer from Hands On Nashville',
                         notes1= 'Language Preference: ' + row['Language Preference'], 
                         notes2 = 'Notes' + row['Notes'], 
                         notes3 ='Followup: ' + row['Saturday Help'])

In [260]:
survivors

Unnamed: 0,name,contact_1,contact_2,contact_3
0,ALE MEDINA,6155781419,ale06m@yahoo.com,
1,MARIA RODRIGUEZ-CHAPMAN,6159390648,animallvrlincoln@gmail.com,
2,LINDA SLAYTON,615-832-4065,mismusic@comcast.net,
3,CASSIE LONG,6152102008,Cassielong1008@gmail.com,
4,JOHNNY GRIFFIN,6153304896,gjohnny00@gmail.com,
5,SHARI SMITH,16159443626,sharismith4550@att.net,
6,SHANNON BANKS,5746067065,Shannonekennedy@gmail.com,
7,L CURRY,615-415-9142,ncmailbox@aol.com,
8,JOHN WISEMAN,931-638-3241,johndillonwiseman@gmail.com,
9,STEPHANIE OWENS,4349071295,stephaniekowens@yahoo.com,


In [369]:
contact_points.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   residence_id  891 non-null    object        
 1   survivor_id   59 non-null     object        
 2   date          891 non-null    datetime64[ns]
 3   water_lvl     500 non-null    object        
 4   damage_lvl    852 non-null    object        
 5   notes1        548 non-null    object        
 6   notes2        501 non-null    object        
 7   notes3        259 non-null    object        
 8   intaker       891 non-null    object        
dtypes: datetime64[ns](1), object(8)
memory usage: 62.8+ KB


In [263]:
residences.iloc[187]

address      521 CATHY JO CIR
zipcode                   NaN
latitude                  NaN
longitude                 NaN
Name: 187, dtype: object

In [261]:
contact_points['residence_id'].value_counts()

153    3
633    3
64     3
15     3
187    2
      ..
308    1
309    1
310    1
311    1
805    1
Name: residence_id, Length: 806, dtype: int64