In [1]:
import urllib.request
from bs4 import BeautifulSoup
import pandas as pd
import re

In [2]:
# Get the greatest value of beedroms, bathrooms, garage, area, etc. 
# This is only if has more then one result, if not return the unique value
# icons: list of single icons to get the greatest value
def getGreatestValue(icons):
    greatestValue = 0
    
    for icon in icons:
        value = re.sub("[^0-9]", "", icon.parent.span.text)
        if int(value) > int(greatestValue):
            greatestValue = value
    
    return greatestValue

# Get property info using the parent element and the single icon (badrooms, bathrooms, area, etc)
# element: parent div that contains all the expected information
# icon: single icon to get the value
def getPropertyInfoByIcon(element, icon):
    icons = element.findAll('i', attrs={'class': icon})
    
    if not icons:
        return ''
    
    return getGreatestValue(icons)

# Extract information about each property
# allProperties: all property element div to extract the information about each property
def extractAllPropertyInfo(allProperties):
    propertiesResult = []
    
    for item in allProperties:
        itemUrl = 'https://www.cciimoveismg.com.br' + item.find('div', attrs={'class': 'dados'}).parent['href']
        page = urllib.request.urlopen(itemUrl)
        result = BeautifulSoup(page, 'html.parser')

        content = result.find('div', attrs={'class': 'conteudo_imovel'})
        info = content.find('div', attrs={'class': 'infos_imovel'})

        defaultResult = {
            'id': item['id'],
            'type': item.find('h3', attrs={'class': 'tipo'}).text,
            'location': content.find('h2', attrs={'class': 'localizacao'}).span.text,
            'badroom': getPropertyInfoByIcon(info, 'icon bed'),
            'bathroom': getPropertyInfoByIcon(info, 'icon shower'),
            'suite': getPropertyInfoByIcon(info, 'icon bath') ,
            'garage': getPropertyInfoByIcon(info, 'icon car') ,
            'area': getPropertyInfoByIcon(info, 'icon expand'),
            'sellPrice': item.find('div', attrs={'class': 'valor'}).find('h5').text,
            'rentPrice': item.find('div', attrs={'class': 'valor sep_valor'}).find('h5').text if item.find('div', attrs={'class': 'valor sep_valor'}) != None else ''
        } 

        propertiesResult.append(defaultResult)

        print(defaultResult)
    
    print(str(len(propertiesResult)) + ' properties finded')
    
    return propertiesResult

# Get All properties of the page
# pageLink: link of the current page to get all properties inside it
def getAllProperties(pageLink):
    wiki = pageLink
    page = urllib.request.urlopen(wiki)
    soup = BeautifulSoup(page, 'html.parser')
    
    allProperties = soup.find('div', attrs={'class': 'todos_imoveis', 'id': 'lista'})
    allProperties = allProperties.findAll('div', 
                        attrs={'class': ['resultado resultado_lista resultado_', 'resultado resultado_lista resultado_last']})
    
    return extractAllPropertyInfo(allProperties)

In [3]:
resultantProperties = []

In [4]:
resultantProperties.extend(getAllProperties('https://www.cciimoveismg.com.br/comprar/mg/santa-rita-do-sapucai/quantidade-197/'))

{'id': '69918379', 'type': 'CASA', 'location': 'SANTANA II - SANTA RITA DO SAPUCAÍ/MG', 'badroom': '3', 'bathroom': '2', 'suite': '1', 'garage': '2', 'area': '300', 'sellPrice': 'R$ 1.100,00', 'rentPrice': ''}
{'id': '2458644', 'type': 'CHACARA', 'location': 'Proximo ao balaio - Santa Rita do Sapucai/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '1000', 'sellPrice': 'R$ 30.000,00', 'rentPrice': ''}
{'id': '68744642', 'type': 'TERRENO', 'location': 'JARDIM INTERLAGOS - SANTA RITA DO SAPUCAI/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '250', 'sellPrice': 'Valor sob consulta', 'rentPrice': ''}
{'id': '4588211', 'type': 'TERRENO', 'location': 'SANTANA II - santa rita do sapucaí/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '300', 'sellPrice': 'R$ 63.000,00', 'rentPrice': ''}
{'id': '68578026', 'type': 'CHACARA', 'location': 'SAO JOSE - SANTA RITA DO SAPUCAI/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage'

{'id': '68515287', 'type': 'TERRENO', 'location': 'SANTANA II - SANTA RITA DO SAPUCAI/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '606', 'sellPrice': 'R$ 160.000,00', 'rentPrice': ''}
{'id': '34122374', 'type': 'TERRENO', 'location': 'SANTANA I - Santa Rita do Sapucaí/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '342', 'sellPrice': 'R$ 160.000,00', 'rentPrice': ''}
{'id': '4519331', 'type': 'TERRENO', 'location': 'Vila das Fontes - Santa Rita do Sapucai/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '574', 'sellPrice': 'R$ 160.000,00', 'rentPrice': ''}
{'id': '69863666', 'type': 'TERRENO', 'location': 'SANTANA - SANTA RITA DO SAPUCAÍ/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '341', 'sellPrice': 'R$ 160.000,00', 'rentPrice': ''}
{'id': '34274641', 'type': 'APARTAMENTO', 'location': 'são jose - santa rita do sapucaí/MG', 'badroom': '2', 'bathroom': '1', 'suite': '1', 'garage': '', 'a

{'id': '68371605', 'type': 'CASA', 'location': 'Vista Alegre - Santa Rita do Sapucaí/MG', 'badroom': '3', 'bathroom': '1', 'suite': '', 'garage': '2', 'area': '200', 'sellPrice': 'R$ 300.000,00', 'rentPrice': ''}
{'id': '69521396', 'type': 'CASA', 'location': 'VISTA ALEGRE - SANTA RITA DO SAPUCAI/MG', 'badroom': '3', 'bathroom': '2', 'suite': '', 'garage': '2', 'area': '200', 'sellPrice': 'R$ 300.000,00', 'rentPrice': ''}
{'id': '3306517', 'type': 'CASA', 'location': 'Santana II - Santa Rita do Sapucai/MG', 'badroom': '3', 'bathroom': '2', 'suite': '', 'garage': '2', 'area': '373', 'sellPrice': 'R$ 300.000,00', 'rentPrice': ''}
{'id': '2467493', 'type': 'GALPAO', 'location': 'Monte Belo - Santa Rita do Sapucai/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '300', 'sellPrice': 'R$ 310.000,00', 'rentPrice': ''}
{'id': '68106291', 'type': 'APARTAMENTO', 'location': 'centro - Santa Rita do Sapucaí/MG', 'badroom': '2', 'bathroom': '1', 'suite': '', 'garage': '1', 'ar

{'id': '68900602', 'type': 'CASA', 'location': 'MARISTELA - SANTA RITA DO SAPUCAI/MG', 'badroom': '3', 'bathroom': '', 'suite': '1', 'garage': '3', 'area': '200', 'sellPrice': 'R$ 430.000,00', 'rentPrice': ''}
{'id': '2343068', 'type': 'SITIO', 'location': 'Balaio - Santa Rita do Sapucai/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '2', 'sellPrice': 'R$ 440.000,00', 'rentPrice': ''}
{'id': '3300210', 'type': 'CASA', 'location': 'Vianna - Santa Rita do Sapucai/MG', 'badroom': '3', 'bathroom': '2', 'suite': '1', 'garage': '2', 'area': '240', 'sellPrice': 'R$ 440.000,00', 'rentPrice': ''}
{'id': '2899406', 'type': 'SITIO', 'location': 'São jose - Santa Rita do Sapucai/MG', 'badroom': '3', 'bathroom': '3', 'suite': '', 'garage': '', 'area': '6000', 'sellPrice': 'Valor sob consulta', 'rentPrice': ''}
{'id': '69948831', 'type': 'CASA', 'location': 'MONTE VERDE - SANTA RITA DO SAPUCAÍ/MG', 'badroom': '3', 'bathroom': '1', 'suite': '1', 'garage': '2', 'area': '297', '

{'id': '3002344', 'type': 'CASA', 'location': 'Fernandes - Santa Rita do Sapucai/MG', 'badroom': '3', 'bathroom': '2', 'suite': '1', 'garage': '2', 'area': '250', 'sellPrice': 'R$ 650.000,00', 'rentPrice': ''}
{'id': '68134742', 'type': 'CASA', 'location': 'SANTANA I - Santa Rita do Sapucaí/MG', 'badroom': '3', 'bathroom': '1', 'suite': '1', 'garage': '4', 'area': '383', 'sellPrice': 'R$ 650.000,00', 'rentPrice': ''}
{'id': '2681977', 'type': 'APARTAMENTO', 'location': 'Centro - Santa Rita do Sapucai/MG', 'badroom': '3', 'bathroom': '1', 'suite': '1', 'garage': '2', 'area': '12603', 'sellPrice': 'R$ 690.000,00', 'rentPrice': ''}
{'id': '2889329', 'type': 'CASA', 'location': 'Centro - Santa Rita do Sapucai/MG', 'badroom': '3', 'bathroom': '2', 'suite': '', 'garage': '', 'area': '540', 'sellPrice': 'R$ 700.000,00', 'rentPrice': ''}
{'id': '2563321', 'type': 'CASA', 'location': 'Santana I - Santa Rita do Sapucai/MG', 'badroom': '4', 'bathroom': '2', 'suite': '1', 'garage': '', 'area': '36

{'id': '33903905', 'type': 'CASA', 'location': 'centro - santa rita do sapucai/MG', 'badroom': '4', 'bathroom': '', 'suite': '', 'garage': '', 'area': '3950', 'sellPrice': 'R$ 3.000.000,00', 'rentPrice': ''}
{'id': '67731070', 'type': 'FAZENDA', 'location': 'Sertaozinho/Balaio - Santa Rita do Sapucai/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '500', 'sellPrice': 'R$ 3.800.000,00', 'rentPrice': ''}
{'id': '3494883', 'type': 'SITIO', 'location': 'Sao Jose - Santa Rita do Sapucai/MG', 'badroom': '15', 'bathroom': '3', 'suite': '2', 'garage': '', 'area': '200', 'sellPrice': 'Valor sob consulta', 'rentPrice': ''}
197 properties finded


In [5]:
resultantProperties.extend(getAllProperties('https://www.cciimoveismg.com.br/alugar/mg/santa-rita-do-sapucai/quantidade-33/'))

{'id': '67731262', 'type': 'SALA', 'location': 'centro - Santa Rita do Sapucai/MG', 'badroom': '', 'bathroom': '', 'suite': '', 'garage': '', 'area': '20', 'sellPrice': 'R$ 700,00', 'rentPrice': ''}
{'id': '67724120', 'type': 'APARTAMENTO', 'location': 'fernandes - santa rita do sapucai/MG', 'badroom': '2', 'bathroom': '1', 'suite': '', 'garage': '1', 'area': '46', 'sellPrice': 'R$ 180.000,00', 'rentPrice': 'R$ 750,00'}
{'id': '69783147', 'type': 'APARTAMENTO', 'location': 'JARDIM BEIRA RIO - SANTA RITA DO SAPUCAÍ/MG', 'badroom': '2', 'bathroom': '1', 'suite': '', 'garage': '1', 'area': '50', 'sellPrice': 'R$ 760,00', 'rentPrice': 'R$ 760,00'}
{'id': '32673389', 'type': 'SALA', 'location': 'Centro - Santa Rita do Sapucai/MG', 'badroom': '', 'bathroom': '1', 'suite': '', 'garage': '', 'area': '18', 'sellPrice': 'R$ 800,00', 'rentPrice': 'R$ 800,00'}
{'id': '69802564', 'type': 'APARTAMENTO', 'location': 'MONTE VERDE II - SANTA RITA DO SAPUCAÍ/MG', 'badroom': '2', 'bathroom': '1', 'suite'

In [6]:
df = pd.DataFrame(resultantProperties)
df.columns = ['ID', 'Tipo', 'Localização', 'Quant. Quarto', 'Quant. Banheiro', 'Quant. Suite', 
                                          'Quant. Vagas Garagem', 'Area Total', 'Preço de Venda', 'Preço de aluguel']
df

Unnamed: 0,ID,Tipo,Localização,Quant. Quarto,Quant. Banheiro,Quant. Suite,Quant. Vagas Garagem,Area Total,Preço de Venda,Preço de aluguel
0,69918379,CASA,SANTANA II - SANTA RITA DO SAPUCAÍ/MG,3,2,1,2,300,"R$ 1.100,00",
1,2458644,CHACARA,Proximo ao balaio - Santa Rita do Sapucai/MG,,,,,1000,"R$ 30.000,00",
2,68744642,TERRENO,JARDIM INTERLAGOS - SANTA RITA DO SAPUCAI/MG,,,,,250,Valor sob consulta,
3,4588211,TERRENO,SANTANA II - santa rita do sapucaí/MG,,,,,300,"R$ 63.000,00",
4,68578026,CHACARA,SAO JOSE - SANTA RITA DO SAPUCAI/MG,,,,,1000,"R$ 75.000,00",
...,...,...,...,...,...,...,...,...,...,...
225,69413267,CASA,CENTRO - SANTA RITA DO SAPUCAI/MG,4,2,1,2,600,"R$ 3.200,00","R$ 3.200,00"
226,4901318,CASA,Centro - Santa Rita do Sapucaí/MG,5,3,1,5,840,"R$ 2.300.000,00","R$ 3.500,00"
227,68168324,GALPAO,Fernandes - Santa Rita do Sapucaí/MG,,,,,400,"R$ 4.000,00","R$ 4.000,00"
228,68658073,GALPAO,FERNANDES - SANTA RITA DO SAPUCAI/MG,,2,,2,400,"R$ 4.000,00","R$ 4.000,00"


In [7]:
df.to_csv(r'datasets/data.csv', index = False, header=True)