# Data Acquisition

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd 

In [3]:
base_URL = 'https://www.vivareal.com.br/aluguel/sp/sao-paulo/apartamento_residencial/?pagina='

In [4]:
# get ads in multiple pages and returns a list of dataFrames
def scrap(url, pages=2):
    '''Scraps search results from a real state website'''
    if pages < 2:
        raise ValueError('number of pages must be greater than 2')
    scrap_df = [] # list to hold all data frames created per page
    for p_no in range(1, pages):
        search_url = url + str(p_no)
        # check if request is successful
        try:
            page_content = requests.get(search_url).content
            print(search_url)
        except Exception as e:
            print(f'Failed to gather data from {search_url}')
            print(e)
    
        soup = BeautifulSoup(page_content) # get page content into soup

        rent_tag_list = soup.find_all('div', {'class': 'property-card__price js-property-card-prices js-property-card__price-small'}) # gets all rent tags
        rent_text_list = [rent.p.text for rent in rent_tag_list] # gets the text from each rent tag


        rooms_tag_list = soup.find_all('li', {'class': 'property-card__detail-item property-card__detail-room js-property-detail-rooms'})
        rooms_text_list = [room.span.text for room in rooms_tag_list]


        address_tag_list = soup.find_all('span', {'class': 'property-card__address'})
        address_text_list = [address.text for address in address_tag_list]


        bathroom_tag_list = soup.find_all('li', {'class': 'property-card__detail-item property-card__detail-bathroom js-property-detail-bathroom'})
        bathroom_text_list = [bathroom.span.text for bathroom in bathroom_tag_list]


        parking_tag_list = soup.find_all('li', {'class': 'property-card__detail-item property-card__detail-garage js-property-detail-garages'})
        parking_text_list = [parking.span.text for parking in parking_tag_list]

        area_tag_list = soup.find_all('span', {'class': 'property-card__detail-value js-property-card-value property-card__detail-area js-property-card-detail-area'})
        area_text_list = [area.text for area in area_tag_list]

        # create a dictionary to create a pandas dataframe
        data = {'address': address_text_list,
                'rent': rent_text_list,
                'rooms': rooms_text_list,
                'bathroom': bathroom_text_list,
                'parking': parking_text_list,
                'area': area_text_list}
        
        #create the dataFrame
        df = pd.DataFrame(data)
        print(df)
        scrap_df.append(df)
    
    final_data = pd.concat(scrap_df)
    # return final dataFrame
    return final_data

In [6]:
final_data = scrap(base_URL, 8)

eiros, São Pau...     R$ 4.300 /Mês    
11     Rua Viradouro, 96 - Itaim Bibi, São Paulo - SP     R$ 4.574 /Mês    
12  Rua José Antônio Coelho, 407 - Vila Mariana, S...     R$ 1.900 /Mês    
13     Rua Tamandaré, 464 - Liberdade, São Paulo - SP     R$ 1.800 /Mês    
14  Rua Comendador Miguel Calfat, 62 - Vila Nova C...     R$ 2.000 /Mês    
15  Rua da Consolação, 1515 - Consolação, São Paul...     R$ 2.700 /Mês    
16  Avenida Moaci, 780 - Planalto Paulista, São Pa...     R$ 2.900 /Mês    
17  Rua Desembargador do Vale, 350 - Perdizes, São...     R$ 6.500 /Mês    
18  Avenida Onze de Junho, 730 - Vila Clementino, ...     R$ 2.290 /Mês    
19     Rua Pamplona - Jardim Paulista, São Paulo - SP     R$ 2.600 /Mês    
20      Rua Inhambú - Vila Uberabinha, São Paulo - SP    R$ 55.000 /Mês    
21                          Pinheiros, São Paulo - SP     R$ 3.800 /Mês    
22  Rua Padre Carvalho, 68 - Pinheiros, São Paulo ...     R$ 2.950 /Mês    
23  Rua Coronel Camisão, 409 - Vila Gomes, São P

In [7]:
final_data

Unnamed: 0,address,rent,rooms,bathroom,parking,area
0,"Rua Camillo Nader, 175 - Vila Morumbi, São Pau...",R$ 2.500 /Mês,3,3,2,125
1,"Rua Volta Redonda, 757 - Campo Belo, São Paulo...",R$ 7.000 /Mês,2,5,3,276
2,"Rua Cristiano Viana, 463 - Cerqueira César, Sã...",R$ 2.400 /Mês,1,1,1,45
3,"Rua Itararé - Bela Vista, São Paulo - SP",R$ 2.200 /Mês,1,1,1,48
4,"Avenida São João, 97 - Centro, São Paulo - SP",R$ 3.300 /Mês,2,1,1,50
...,...,...,...,...,...,...
31,"Rua Castelhano, 60 - Vila Andrade, São Paulo - SP",R$ 2.400 /Mês,1,2,1,49
32,"Rua Eli - Vila Maria Baixa, São Paulo - SP",R$ 2.000 /Mês,3,2,1,73
33,"Rua Ipanema, 67 - Mooca, São Paulo - SP",R$ 2.500 /Mês,3,2,1,63
34,"Rua Caiubi, 1159 - Perdizes, São Paulo - SP",R$ 10.000 /Mês,4,5,4,180


In [8]:
final_data['suburb'] = final_data.address.str.split('\s-').str[1].str.split(', ').str[0]

In [9]:
final_data

Unnamed: 0,address,rent,rooms,bathroom,parking,area,suburb
0,"Rua Camillo Nader, 175 - Vila Morumbi, São Pau...",R$ 2.500 /Mês,3,3,2,125,Vila Morumbi
1,"Rua Volta Redonda, 757 - Campo Belo, São Paulo...",R$ 7.000 /Mês,2,5,3,276,Campo Belo
2,"Rua Cristiano Viana, 463 - Cerqueira César, Sã...",R$ 2.400 /Mês,1,1,1,45,Cerqueira César
3,"Rua Itararé - Bela Vista, São Paulo - SP",R$ 2.200 /Mês,1,1,1,48,Bela Vista
4,"Avenida São João, 97 - Centro, São Paulo - SP",R$ 3.300 /Mês,2,1,1,50,Centro
...,...,...,...,...,...,...,...
31,"Rua Castelhano, 60 - Vila Andrade, São Paulo - SP",R$ 2.400 /Mês,1,2,1,49,Vila Andrade
32,"Rua Eli - Vila Maria Baixa, São Paulo - SP",R$ 2.000 /Mês,3,2,1,73,Vila Maria Baixa
33,"Rua Ipanema, 67 - Mooca, São Paulo - SP",R$ 2.500 /Mês,3,2,1,63,Mooca
34,"Rua Caiubi, 1159 - Perdizes, São Paulo - SP",R$ 10.000 /Mês,4,5,4,180,Perdizes
