In [0]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import re
import pandas as pd

In [0]:
def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None
    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None

def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)

def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can
    make it do anything.
    """
    print(e)

In [0]:
def get_Barrio(soup, key):
  barrio = []
  if key == 'Zona Cerro-Argüello': 
    for i in str(soup).splitlines():
      if 'Barrio' in i and 'title' in i and 'href' in i:
        for j in i.split('>'):
          if 'Barrio' in j and '</a'in j:
            j = j.replace('Barrio ','')
            j = j.replace('</a','')
            j = j.replace('Ã³','ó')
            j = j.replace('Ã¡','á')
            j = j.replace('Ã­','í')
            j = j.replace('Ã©','é')
            j = j.replace('Ã¼','ü')
            barrio.append(j)
  else:
    for i in str(soup).splitlines():
      if "sortable wikitable smwtable" in i:
        for i in i.split('>'):
          if 'Barrio' in i and not 'title'in i:
            i = i.replace('</a','')
            i = i.replace('Barrio ','')
            barrio.append(i)      
  return list(set(barrio))

In [0]:
def get_Barrios():
  main_url='https://nuestraciudad.info/'
  zonas = {'Zona Central':'portal/Zona_Central',
           'Zona Norte':'portal/Zona_Norte',
           'Zona Sur':'portal/Zona_Sur',
           'Zona Este':'portal/Zona_Este',
           'Zona Oeste':'portal/Zona_Oeste',
           'Zona Nordeste':'portal/Zona_Nordeste',
           'Zona Noroeste':'portal/Zona_Noroeste',
           'Zona Sudeste':'portal/Zona_Sudeste',
           'Zona Sudoeste':'portal/Zona_Sudoeste',
           'Zona Cerro-Argüello':'portal/Zona_Cerro_/_Argüello', 
           'Zona Guiñazú':'portal/Zona_Guiñazú',
          }
  
  barrios = {}
  for key, value in zonas.items():
    soup = BeautifulSoup(simple_get(main_url+value))
    barrios[key] = get_Barrio(soup, key) 
  return barrios

In [0]:
def get_DataFrame_Barrios():
  barrios = get_Barrios()
  num = len(barrios['Zona Sudeste'])
  for key, value in barrios.items():
    r = num - len(value)
    for _ in range(r):
      barrios[key].append('NaN')
  col = ['Zona Central',
         'Zona Norte',
         'Zona Sur',
         'Zona Este',
         'Zona Oeste',
         'Zona Nordeste',
         'Zona Noroeste',
         'Zona Sudeste',
         'Zona Sudoeste',
         'Zona Cerro-Argüello', 
         'Zona Guiñazú',
        ]
  return pd.DataFrame(barrios, columns=col)

In [108]:
df = get_DataFrame_Barrios()
df.head()

Unnamed: 0,Zona Central,Zona Norte,Zona Sur,Zona Este,Zona Oeste,Zona Nordeste,Zona Noroeste,Zona Sudeste,Zona Sudoeste,Zona Cerro-Argüello,Zona Guiñazú
0,Cáceres,Parque Liceo Sección 1,Mirizzi,General Pueyrredón,Ampliación Las Palmas,La Hortensia,San Martín Anexo,José Ignacio Díaz Sección 5,Guarnición Aérea Córdoba,Villa Serrana,Guiñazú Sud
1,Güemes,Cofico,Irupé,Yofre Sud,Chateau Carreras,Residencial Aragón,San Martín Norte,Mirador,Ampliación Rosedal,Parque Modelo,Guiñazú
2,Nueva Córdoba,Jerónimo Luis de Cabrera,San Antonio,Hogar Propio,Puente Blanco,Vivero Norte,Las Margaritas,Ampliación San Pablo,Parque Republica,UOCRA,Remedios de Escalada
3,Observatorio,Ampliación Panamericano,Cabo Farina,Juniors,Country Lomas de la Carolina,Villa Retiro,Lomas de San Martín,Villa Revol,Achával Peña,Granja de Funes,Recreo del Norte
4,Paso de los Andes,Panamericano,Vicor,Ampliación Yapeyú,Ampliación Los Plátanos,Parque Liceo Sección 3,Altos de Villa Cabrera,Ferroviario Mitre,Residencial Olivos,Villa 9 de Julio,Liceo General Paz


In [0]:
df.to_csv('Barrios.csv', encoding='utf_8_sig')