# imports

In [1]:
from bs4 import BeautifulSoup
from datetime import date
from requests import get
import pandas as pd
today = date.today()

# crawler

## defining url

In [2]:
url = 'https://www.pararius.com/apartments/nederland/page-'
response = get(url+'1')
html_soup = BeautifulSoup(response.text, 'html.parser')

## get numbers to define iteractions

In [3]:
num_pages = html_soup.find_all('a',{"class":"pagination__link"})[4].text
num_pages = int(num_pages)

num_rents = html_soup.find_all('div',{"class":"pagination__summary"})[0].text
num_rents = int(num_rents.split()[-2])

In [4]:
print(url+'1')
print(num_pages)
print(num_rents)

https://www.pararius.com/apartments/nederland/page-1
176
5269


## get total html soup

In [5]:
houses = []
for count in range(1, num_pages+1):
    try:
        response = get(url + str(count))
        html_soup = BeautifulSoup(response.text, 'html.parser')
        house_data = html_soup.find_all('li', class_="search-list__item search-list__item--listing")
        houses.append(house_data)
    except:
        pass

## get data

In [6]:
# description_=[]
price_=[]
address_=[]
street_=[]
agency_=[]
irl_=[]
image_=[]
description_2=[]

for n_pages in range (num_pages):
    n_pages_len = len(houses[n_pages])
    for n_rents in range (n_pages_len):
        num = (houses[n_pages][n_rents])
        list_li = num.find_all('li',{"class":"illustrated-features__item"})
        description_1={}
        
        try:
            p5_value = num.find_all('span',{"class":"listing-label listing-label--under-option"})[0].text
        except:
            p5_value = None
        
        for i in range(len(list_li)):
            a = (list_li[i]["class"][1][28:])
            b = (list_li[i].text)
            description_1.update({
                a:b,
                'status':p5_value,
            })
        description_2.append(description_1)
        
        price = num.find_all('div',{"class":"listing-search-item__price"})[0].text
        price_.append(price)

        address = num.find('div',{"class":"listing-search-item__location"}).text
        address_.append(address)

        street = num.find_all('a',{"class":"listing-search-item__link listing-search-item__link--title"})[0].text
        street_.append(street)

        irl = num.find_all('a',{"class":"listing-search-item__link listing-search-item__link--title"})[0]['href']
        irl_.append(irl)
        
        image = num.find_all('img')[0]['src']
        image_.append(image)

        agency = num.find_all('a', href=True)[2].text
        agency_.append(agency)

# Into DataFrame

In [7]:
df_pararius_1 = pd.DataFrame({
    'price':price_,
    'address':address_,
    'street':street_,
    'agency':agency_,
    'irl':irl_,
    'image':image_,
})
df_pararius_2 = pd.DataFrame(description_2)

## Unify dataframes

In [8]:
df_pararius = pd.concat([df_pararius_1, df_pararius_2],1)

  df_pararius = pd.concat([df_pararius_1, df_pararius_2],1)


# fixing text

In [9]:
df_pararius['surface-area']        = df_pararius['surface-area'].str.replace("\D","",regex=True)
df_pararius['number-of-rooms']     = df_pararius['number-of-rooms'].str.replace("\D","",regex=True)
df_pararius['garden-surface-area'] = df_pararius['garden-surface-area'].str.replace("\D","",regex=True)
df_pararius['plot-size']           = df_pararius['plot-size'].str.replace("\D","",regex=True)

In [10]:
df_pararius = df_pararius.dropna(axis=1, how='all')
df_pararius['price'] = df_pararius['price'].str.replace("\D","",regex=True)
df_pararius['address'] = df_pararius['address'].str.replace('\n|new|  ',"",regex=True)
df_pararius['postcode'] = df_pararius['address'].str.replace("\s","",regex=True).str[0:6]
df_pararius['status'] = df_pararius['status'].str.replace('\n',"",regex=True)
df_pararius['date'] = str(today)

In [11]:
df_pararius = df_pararius.drop_duplicates(subset=['irl']).reset_index(drop=True)

# save data

In [12]:
today_csv = ("df_pararius_"+ str(today)+'.csv')
df_pararius.to_csv(f'data/temp/{today_csv}')

In [13]:
df_pararius

Unnamed: 0,price,address,street,agency,irl,image,surface-area,status,number-of-rooms,interior,construction-period,garden-surface-area,plot-size,postcode,date
0,3150,3072 AP Rotterdam (Kop van Zuid),Apartment Wilhelminakade,IRIS Housing Rotterdam,/apartment-for-rent/rotterdam/8c05cc3f/wilhelm...,https://media.pararius.nl/image/PR0001657000/P...,127,,3,Furnished,,,,3072AP,2021-10-28
1,1350,5582 HH Waalre (Aalst),House Gestelsestraat,123Wonen Eindhoven,/house-for-rent/waalre/3e9b6198/gestelsestraat,https://casco-media-prod.global.ssl.fastly.net...,138,,4,Upholstered,,,,5582HH,2021-10-28
2,1600,1108 GC Amsterdam (Nellestein),Apartment Leksmondhof,Expats. Amsterdam Rentals,/apartment-for-rent/amsterdam/23fea928/leksmon...,https://media.pararius.nl/image/PR0001659000/P...,75,,3,Upholstered,,,,1108GC,2021-10-28
3,875,6812 AB Arnhem (Klingelbeek),Apartment Utrechtseweg,Domica Arnhem,/apartment-for-rent/arnhem/01c8a3d8/utrechtseweg,https://casco-media-prod.global.ssl.fastly.net...,65,,2,Upholstered,,,,6812AB,2021-10-28
4,1350,1055 ZR Amsterdam (Landlust),Apartment Solebaystraat,Aham Vastgoed,/apartment-for-rent/amsterdam/8cbcd9ab/solebay...,https://casco-media-prod.global.ssl.fastly.net...,45,,3,Upholstered,,,,1055ZR,2021-10-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5264,1500,1505 TJ Zaandam (Bomenbuurt),House Lijsterbesstraat,Uw Partner in Onroerende Zaken,/house-for-rent/zaandam/1b23bcc5/lijsterbesstraat,https://casco-media-prod.global.ssl.fastly.net...,86,Under option,4,Upholstered,,,,1505TJ,2021-10-28
5265,3450,3045 LJ Rotterdam (Zestienhoven),House Overschiese Kleiweg,BenHousing,/house-for-rent/rotterdam/e5d0ae4e/overschiese...,https://casco-media-prod.global.ssl.fastly.net...,294,Under option,6,Upholstered or furnished,,,,3045LJ,2021-10-28
5266,960,3762 GK Soest (Klaarwater),Apartment Wiardi Beckmanstraat,Regio Vastgoedmakelaars,/apartment-for-rent/soest/2e33b708/wiardi-beck...,https://casco-media-prod.global.ssl.fastly.net...,90,Under option,4,Shell,,,,3762GK,2021-10-28
5267,1350,1567 LD Assendelft (Parkrijk),Apartment Wisselwachter,Uw Partner in Onroerende Zaken,/apartment-for-rent/assendelft/829433fa/wissel...,https://casco-media-prod.global.ssl.fastly.net...,76,Under option,3,Upholstered,,,,1567LD,2021-10-28
