In [1]:
import pandas as pd

In [2]:
nmc = pd.read_csv('barrio_medical_center.csv')
chc = pd.read_csv('community_health_center.csv')
hos = pd.read_csv('hospitals.csv')
pharm = pd.read_csv('pharmacies.csv')
pp = pd.read_csv('private_hospital.csv')

In [3]:
print(nmc.columns)
print(chc.columns)
print(hos.columns)
print(pharm.columns)
print(pp.columns)

Index(['long', 'lat', 'objectid', 'nombre', 'domicilio', 'piso_dto',
       'telefono', 'area_prog', 'region_san', 'calle', 'calle2', 'altura',
       'altura2', 'barrio', 'comuna', 'codigo_postal',
       'codigo_postal_argentino', 'especialid', 'dom_mapa', 'dom_geo'],
      dtype='object')
Index(['long', 'lat', 'id', 'nombre', 'telefono', 'jefe', 'area_progr',
       'region_san', 'comuna', 'barrio', 'codigo_postal',
       'codigo_postal_argentino', 'especialid', 'calle', 'altura', 'calle2',
       'direccion', 'observacio', 'nom_ante', 'vih', 'servicios_', 'efe_salud',
       'vacunat_'],
      dtype='object')
Index(['long', 'lat', 'id', 'nombre', 'nom_map', 'objeto', 'calle', 'altura',
       'dom_geo', 'telefono', 'guardia', 'fax', 'web', 'tipo', 'tipo_espec',
       'mod_at_1', 'mod_at_2', 'depend_adm', 'director', 'dom_norma', 'barrio',
       'comuna', 'codigo_postal', 'codigo_postal_argentino', 'vhi',
       'servicios_', 'efe_salud', 'vacunat_'],
      dtype='object')
Index(

In [4]:
#This function will clean the dataframes and return it with renamed columns.

def cleanData(df):

        #First, we define the column names used in all dataframes and their corresponding unified name
        columnNames = {
            'long':'Longitude', 'LNG':'Longitude', 
            'lat': 'Latitude', 'LAT': 'Latitude', 
            'barrio': 'Barrio', 'BARRIO': 'Barrio',
            'nombre': 'Name', 'NOMBRE': 'Name', 'calle_nombre': 'Name'
        }
        
        #Then, delete empty rows, rename the columns and set the index for the Neighborhood (Name of the Barrio)
        df = (df.rename(columns=columnNames))
        
        #With the following line, we indicate that we only wish to keep the columns below, then the rest can be deleted
        df = df[['Barrio', 'Name', 'Longitude', 'Latitude']]
        
        return df.dropna()

In [5]:
nmc = cleanData(nmc)
chc = cleanData(chc)
hos = cleanData(hos)
pharm = cleanData(pharm)
pp = cleanData(pp)

In [6]:
nmc['Type'] = 'barrio medical center'
nmc.head()

Unnamed: 0,Barrio,Name,Longitude,Latitude,Type
0,Villa Santa Rita,CMB N° 2,-58.482987,-34.622156,barrio medical center
1,Liniers,CMB N° 3,-58.526465,-34.634808,barrio medical center
2,Flores,CMB N° 4,-58.452554,-34.635635,barrio medical center
3,San Cristobal,CMB N° 6,-58.393664,-34.619627,barrio medical center
4,Liniers,CMB N° 7,-58.519241,-34.641822,barrio medical center


In [7]:
chc['Type'] = 'community health center'
chc.head()

Unnamed: 0,Barrio,Name,Longitude,Latitude,Type
0,RETIRO,Centro de Salud Nivel 1 Retiro 3 (CeSAC Nº 47),-58.379536,-34.584619,community health center
1,VILLA LUGANO,Centro de Salud Nivel 1 Villa Lugano 6 (CeSAC ...,-58.466629,-34.677809,community health center
2,VILLA LUGANO,Centro de Salud Nivel 1 Villa Lugano 7 (CeSAC ...,-58.467364,-34.66073,community health center
3,ALMAGRO,Centro de Salud Nivel 1 Almagro (CeSAC N° 38),-58.42157,-34.606588,community health center
4,BOCA,Centro de Salud Nivel 1 La Boca 2 (CeSAC N° 41),-58.357925,-34.631327,community health center


In [8]:
hos['Type'] = 'public hospital'
hos.head()

Unnamed: 0,Barrio,Name,Longitude,Latitude,Type
0,BARRACAS,HOSPITAL GENERAL DE NIÑOS PEDRO DE ELIZALDE,-58.377551,-34.628847,public hospital
1,RECOLETA,HOSPITAL GENERAL DE NIÑOS RICARDO GUTIERREZ,-58.41207,-34.594192,public hospital
2,RECOLETA,HOSPITAL DE ODONTOLOGIA DR. RAMON CARRILLO (EX...,-58.400514,-34.584767,public hospital
3,BARRACAS,HOSPITAL DE SALUD MENTAL BRAULIO MOYANO,-58.385156,-34.639404,public hospital
4,PARQUE PATRICIOS,HOSPITAL DE GASTROENTEROLOGIA B. UDAONDO,-58.391311,-34.634154,public hospital


In [9]:
pharm['Type'] = 'pharmacy'
pharm.head()

Unnamed: 0,Barrio,Name,Longitude,Latitude,Type
0,Monte Castro,AV LOPE DE VEGA,-58.508072,-34.626815,pharmacy
1,Villa Gral. Mitre,AV JUAN B. JUSTO,-58.468976,-34.611707,pharmacy
2,Recoleta,GUEMES,-58.409305,-34.591761,pharmacy
3,Liniers,CARHUE,-58.524821,-34.639616,pharmacy
4,Monte Castro,AV ALVAREZ JONTE,-58.50665,-34.620429,pharmacy


In [10]:
pp['Type'] = 'private hospital'
pp.head()

Unnamed: 0,Barrio,Name,Longitude,Latitude,Type
0,Palermo,ALPI - INSTITUTO DE REHABILITACION MARCELO FIT...,-58.417333,-34.59163,private hospital
1,Almagro,CAMI - MEDICINA DEL TRABAJO,-58.420476,-34.612884,private hospital
2,Recoleta,CEMIC - Sede Las Heras,-58.402039,-34.583653,private hospital
3,Saavedra,CEMIC - Sede Saavedra,-58.495217,-34.556983,private hospital
4,Balvanera,CENTRO GALLEGO DE BUENOS AIRES,-58.397299,-34.613983,private hospital


In [11]:
df = pd.concat([nmc, chc, hos, pharm, pp])

In [12]:
df['Barrio'] = df['Barrio'].str.upper()

In [13]:
#Since the names of some barrios are incorrect, this will correct them so that they all have the right names.

df.reset_index(inplace=True)
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('AGRONOMÍA', 'AGRONOMIA'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('NUNEZ', 'NUÑEZ'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('NU?ÆEZ', 'NUÑEZ'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('NU?EZ', 'NUÑEZ'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('NU�EZ', 'NUÑEZ'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('GOGHLAND', 'COGHLAN'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('PARQUE AVELLANED', 'PARQUE AVELLANEDA'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('SARSFIELD', 'SARFIELD'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('VILLA GRAL. MITRE', 'VILLA GENERAL MITRE'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('VILLA GRAL. MITR', 'VILLA GENERAL MITRE'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('VILLA GRAL MITRE', 'VILLA GENERAL MITRE'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('VILLA PUYRREDON', 'VILLA PUEYRREDON'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('PARQUE AVELLANEDAA', 'PARQUE AVELLANEDA'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('VERSALLES', 'VERSAILLES'))))
df['Barrio'] = (df['Barrio'].apply(lambda x: str(x.replace('MONSERRAT', 'MONTSERRAT'))))

In [14]:
print(len(df['Barrio'].unique()))
sorted(df['Barrio'].unique())

50


['AGRONOMIA',
 'ALMAGRO',
 'BALVANERA',
 'BARRACAS',
 'BELGRANO',
 'BOCA',
 'BOEDO',
 'CABALLITO',
 'CHACARITA',
 'COGHLAN',
 'COLEGIALES',
 'CONSTITUCION',
 'FLORES',
 'FLORESTA',
 'LA BOCA',
 'LINIERS',
 'MATADEROS',
 'MONTE CASTRO',
 'MONTSERRAT',
 'NUEVA POMPEYA',
 'NUÑEZ',
 'PALERMO',
 'PARQUE AVELLANEDA',
 'PARQUE CHACABUCO',
 'PARQUE CHAS',
 'PARQUE PATRICIOS',
 'PATERNAL',
 'PUERTO MADERO',
 'RAMON CARRILLO',
 'RECOLETA',
 'RETIRO',
 'SAAVEDRA',
 'SAN CRISTOBAL',
 'SAN NICOLAS',
 'SAN TELMO',
 'VELEZ SARFIELD',
 'VERSAILLES',
 'VILLA CRESPO',
 'VILLA DEL PARQUE',
 'VILLA DEVOTO',
 'VILLA GENERAL MITRE',
 'VILLA LUGANO',
 'VILLA LURO',
 'VILLA ORTUZAR',
 'VILLA PUEYRREDON',
 'VILLA REAL',
 'VILLA RIACHUELO',
 'VILLA SANTA RITA',
 'VILLA SOLDATI',
 'VILLA URQUIZA']

In [15]:
barrio_df = pd.read_csv('../../barrio_table.csv')
df = df[df['Barrio'].isin(barrio_df.Barrio)]
df['b_id'] = [int(barrio_df[barrio_df.Barrio == i].id) for i in df.Barrio]
df = df.drop('Barrio', axis=1)

In [16]:
df.head()

Unnamed: 0,index,Name,Longitude,Latitude,Type,b_id
0,0,CMB N° 2,-58.482987,-34.622156,barrio medical center,31
1,1,CMB N° 3,-58.526465,-34.634808,barrio medical center,21
2,2,CMB N° 4,-58.452554,-34.635635,barrio medical center,19
3,3,CMB N° 6,-58.393664,-34.619627,barrio medical center,5
4,4,CMB N° 7,-58.519241,-34.641822,barrio medical center,21


In [17]:
df.to_csv('health_table.csv', index=False)