In [1]:
# Imports

import pandas as pd
import bs4
from bs4 import BeautifulSoup
import requests
import csv

In [2]:
# Setup version

print('pandas version: {}'.format(pd.__version__))
print('bs4 version: {}'.format(bs4.__version__))
print('requests version: {}'.format(requests.__version__))
print('csv version: {}'.format(csv.__version__))

pandas version: 1.5.3
bs4 version: 4.11.2
requests version: 2.31.0
csv version: 1.0


In [3]:
def get_url(city, pages):

    """
    The get_url function returns a list of urls for the searched phrase and the number of pages

    :city: The name or expression of the city you're looking at for rent
    :pages: The number of pages you want to scrape (< maximum number of web pages shown)

    """

    template1= 'https://www.pararius.com/apartments/{}/page-{}'
    urls= []
    for i in range(1,pages+1):
        url= template1.format(city, i)
        urls.append(url)
    return urls


In [4]:
def scrape_results(urls):

    """

    The scrape_results function loops over the urls and scrapes all real estate data

    :links: list of urls generated by calling the get_url function

    """

    records= []

    for url in urls:
        page= requests.get(url)
        soup= BeautifulSoup(page.content, 'html.parser')
        items= soup.find_all('section', {'class':'listing-search-item'})
        template2= 'https://www.pararius.com{}'

        for item in items:

            title= item.find('a', {'class':'listing-search-item__link listing-search-item__link--title'}).text.strip()
            rent_price= item.find('div', {'class':'listing-search-item__price'}).text.strip().replace('per month','')[1:]
            adress= item.find('div', {'class':'listing-search-item__sub-title'}).text.strip()
            surface= item.find('li', {'class':'illustrated-features__item illustrated-features__item--surface-area'}).text.strip().replace('m²','')
            rooms= item.find('li', {'class':'illustrated-features__item illustrated-features__item--number-of-rooms'}).text.strip().replace('rooms','')

            try:
                interior_status= item.find('li', {'class': 'illustrated-features__item illustrated-features__item--interior'}).text.strip()
            except AttributeError:
                interior_status= 'Undefined'

            try:
                agency= item.find('div', class_='listing-search-item__info').text.strip()
            except AttributeError:
                agency='None'

            link= template2.format(item.a['href'])
            contact= scrape_contact(link)

            record= (title, adress, rent_price, surface, rooms, interior_status, agency, contact, link)
            records.append(record)

    with open("data/real_estate.csv", 'w', newline='', encoding='utf-8') as f:
        writer= csv.writer(f)
        writer.writerow(['Title', 'Address', 'Rent Price', 'Surface', 'Rooms', 'Interior Status', 'Agency', 'Contact', 'Link'])
        writer.writerows(records)

    return records


In [7]:
def scrape_results(urls):
    records = []
    for url in urls:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        items = soup.find_all('div', {'class': 'listing-search-item'})
        for item in items:
            title = item.find('a', {'class': 'listing-search-item__link listing-search-item__link--title'}).text.strip()
            rent_price = item.find('div', {'class': 'listing-search-item__price'}).text.strip().replace('per month', '')[1:]
            address_elem = item.find('div', {'class': 'listing-search-item__sub-title'})
            address = address_elem.text.strip() if address_elem else 'Address not found'
            surface = item.find('li', {'class': 'illustrated-features__item illustrated-features__item--surface-area'}).text.strip().replace('m²', '')
            rooms = item.find('li', {'class': 'illustrated-features__item illustrated-features__item--number-of-rooms'}).text.strip().replace('rooms', '')
            records.append({'Title': title, 'Rent Price': rent_price, 'Address': address, 'Surface': surface, 'Rooms': rooms})
    return records

urls = get_url('amsterdam', 17)
records = scrape_results(urls)


In [8]:
df= pd.read_csv('/content/real_estate.csv')
df

Unnamed: 0,Title,Address,Rent Price,Surface,Rooms,Interior Status,Agency,Contact,Link
0,Apartment Hoofdweg,1058 BC Amsterdam (Westindische Buurt),3000,155,6,Upholstered,Out Amsterdam,+31208118850,https://www.pararius.com/apartment-for-rent/am...
1,Apartment Grootzeilhof,1034 MC Amsterdam (Banne Buiksloot),2200,80,3,Furnished,Expat Rental Services BV,/real-estate-agents/amsterdam/expat-rental-ser...,https://www.pararius.com/apartment-for-rent/am...
2,House Edammerstraat 54,1023 VC Amsterdam (Tuindorp Nieuwendam),1700,64,4,Furnished,Amsterdam Wonen,+31202440146,https://www.pararius.com/house-for-rent/amster...
3,Apartment Eerste Jan van der Heijdenstraat 116 H,1072 VB Amsterdam (Oude Pijp),4500,83,3,Furnished,ViaDaan,/real-estate-agents/eindhoven/viadaan-nijmegen,https://www.pararius.com/apartment-for-rent/am...
4,Apartment Van Bossestraat,1051 JS Amsterdam (Staatsliedenbuurt),1450,40,2,Upholstered,Sellaspace Real Estate,+31352031063,https://www.pararius.com/apartment-for-rent/am...
...,...,...,...,...,...,...,...,...,...
522,Apartment NDSM-kade,1033 PG Amsterdam (Noordelijke IJ-oevers West),1568,64,2,Upholstered,"FRIS Woningmakelaars Amsterdam, ...",+31203017715,https://www.pararius.com/apartment-for-rent/am...
523,Apartment NDSM-kade,1033 PG Amsterdam (Noordelijke IJ-oevers West),1460,63,2,Upholstered,"FRIS Woningmakelaars Amsterdam, ...",+31203017715,https://www.pararius.com/apartment-for-rent/am...
524,Apartment NDSM-kade,1033 PG Amsterdam (Noordelijke IJ-oevers West),1487,71,2,Upholstered,"FRIS Woningmakelaars Amsterdam, ...",+31203017715,https://www.pararius.com/apartment-for-rent/am...
525,Apartment Grasweg 168,1031 HX Amsterdam (Noordelijke IJ-oevers West),1550,62,3,Undefined,Uppmark,+31853032852,https://www.pararius.com/apartment-for-rent/am...


In [9]:
df.shape

(527, 9)