## Scrapping one page (Immovlan)

In [None]:
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import pandas as pd

In [None]:
# Url of website
url = 'https://immovlan.be/en/detail/residence/for-sale/2500/lier/rbt69212'


In [None]:
ua = UserAgent()

In [None]:
headers =  {

        "User-Agent": ua.random,
       
        'Accept': (
            'text/html,application/xhtml+xml,application/xml;q=0.9,'
            'image/avif,image/webp,*/*;q=0.8'
        ),
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'TE': 'trailers',
    }


In [None]:
s = requests.Session()
s.headers.update(headers)

In [None]:
response = s.get(url)
content =response.content
print(response)

In [None]:
soup = BeautifulSoup(content, 'html.parser')
soup

##### Get characteristics for property

In [None]:
property_characteristics = dict()

In [None]:
code = soup.find(class_='vlancode')
property_characteristics['property_code'] = code.text

In [None]:
type_of_property = soup.find(class_= 'detail__header_title_main')
property_characteristics['type_of_property'] = type_of_property.text.split()[0]

In [None]:
price = soup.find(class_='detail__header_price_data')
property_characteristics['price'] = price.text

In [None]:
locality = soup.find(class_='city-line')
property_characteristics['locality'] = locality.text

In [None]:
for tag in soup.find_all('h4', class_ =False):
    characteristic_name = "_".join(list(map(str.lower, tag.text.split())))
    property_characteristics[characteristic_name] = tag.find_next().text

In [None]:
for k,v in property_characteristics.items():
    print(f"{k}: {v}")

#### pandas to create a file with all properties

In [None]:
properties = pd.DataFrame.from_records([property_characteristics])
properties

## Scrapping a list of properties

In [1]:
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import random
from time import time, sleep
import pandas as pd


In [2]:
properties = []

In [3]:
urls_df = pd.read_csv("final_listing_links.csv", header=0, nrows=50)
urls =urls_df['link'].tolist()
urls

['https://immovlan.be/en/detail/residence/for-sale/2500/lier/rbt69212',
 'https://immovlan.be/en/detail/residence/for-sale/2440/geel/rbt67563',
 'https://immovlan.be/en/detail/apartment/for-sale/2260/zoerle-parwijs/rbt67578',
 'https://immovlan.be/en/detail/apartment/for-sale/2500/lier/rbt67552',
 'https://immovlan.be/en/detail/apartment/for-sale/2500/lier/rbt67551',
 'https://immovlan.be/en/detail/residence/for-sale/2300/turnhout/rbt66812',
 'https://immovlan.be/en/detail/ground-floor/for-sale/2650/edegem/rbt66872',
 'https://immovlan.be/en/detail/apartment/for-sale/2300/turnhout/rbt66859',
 'https://immovlan.be/en/detail/residence/for-sale/2300/turnhout/rbt66819',
 'https://immovlan.be/en/detail/residence/for-sale/2300/turnhout/rbt66818',
 'https://immovlan.be/en/detail/residence/for-sale/2800/mechelen/rbt66782',
 'https://immovlan.be/en/detail/residence/for-sale/2830/willebroek/rbt66249',
 'https://immovlan.be/en/detail/residence/for-sale/2860/sint-katelijne-waver/rbt66214',
 'https

In [4]:
## Function definition to obtain individual properties

def get_raw_property(url: str, session:requests.Session) -> BeautifulSoup:

    start_time = time()

    sleep(random.uniform(1.5, 3))
    
    try: 
        response = session.get(url)
        content =response.content
        print(response)
        soup = BeautifulSoup(content, 'html.parser')
    except Exception as e:
        print(f"Error {e}")
    
    end_time = time()
    duration = end_time - start_time
    print(f"This scrap has taken {duration}")
    
    return soup    


In [5]:
## Function to obtain a dictionary with the properties
def get_property_characteristics(url:str, soup:BeautifulSoup) -> dict:
    
    property_characteristics = dict()

    code = soup.find(class_='vlancode')
    property_characteristics['property_code'] = code.text

    type_of_property = soup.find(class_= 'detail__header_title_main')
    property_characteristics['type_of_property'] = type_of_property.text.split()[0]

    price = soup.find(class_='detail__header_price_data')
    property_characteristics['price'] = price.text

    locality = soup.find(class_='city-line')
    property_characteristics['locality'] = locality.text

    for tag in soup.find_all('h4', class_ =False):
        characteristic_name = "_".join(list(map(str.lower, tag.text.split())))
        property_characteristics[characteristic_name] = tag.find_next().text

    property_characteristics['property_url'] = url
        
    for k,v in property_characteristics.items():
        print(f"{k}: {v}")

    return property_characteristics

In [6]:
# Establishing session with headers for connection
ua = UserAgent()
headers =  {

        "User-Agent": ua.random,
       
        'Accept': (
            'text/html,application/xhtml+xml,application/xml;q=0.9,'
            'image/avif,image/webp,*/*;q=0.8'
        ),
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'TE': 'trailers',
    }

s = requests.Session()
s.headers.update(headers)


In [7]:

for url in urls:
    soup = get_raw_property(url, s)
    properties.append(get_property_characteristics(url,soup))


<Response [200]>
This scrap has taken 1.9402260780334473
property_code: RBT69212
type_of_property: Residence
price:   239 000 €  
locality: 2500 Lier
state_of_the_property: To be renovated
build_year: 1899
number_of_bedrooms: 3
livable_surface: 99 m²
furnished: No
attic: Yes
kitchen_equipment: Partially equipped
number_of_bathrooms: 1
type_of_heating: Gas
elevator: No
orientation_of_the_front_facade: North-west
number_of_facades: 2
garden: Yes
surface_garden: 18 m²
terrace: Yes
surface_terrace: 18 m²
total_land_surface: 81 m²
buildable_surface: 61 m²
ground_depth: 20 m
gas: Yes
running_water: Yes
specific_primary_energy_consumption: 526 kWh/m²/year
epc/peb_reference: 20250227-0003540517-RES-1
certification_-_electrical_installation: Not applicable
building_permission_granted: No
planning_permission_granted: No
preemption_right: Yes
urbanism_affectation: Residential zone (residential, urban)
flooding_area_type: no flooding area
demarcated_flooding_area: (information not available)
prope

In [8]:
properties


[{'property_code': 'RBT69212',
  'type_of_property': 'Residence',
  'price': '  239\u202f000 €  ',
  'locality': '2500 Lier',
  'state_of_the_property': 'To be renovated',
  'build_year': '1899',
  'number_of_bedrooms': '3',
  'livable_surface': '99 m²',
  'furnished': 'No',
  'attic': 'Yes',
  'kitchen_equipment': 'Partially equipped',
  'number_of_bathrooms': '1',
  'type_of_heating': 'Gas',
  'elevator': 'No',
  'orientation_of_the_front_facade': 'North-west',
  'number_of_facades': '2',
  'garden': 'Yes',
  'surface_garden': '18 m²',
  'terrace': 'Yes',
  'surface_terrace': '18 m²',
  'total_land_surface': '81 m²',
  'buildable_surface': '61 m²',
  'ground_depth': '20 m',
  'gas': 'Yes',
  'running_water': 'Yes',
  'specific_primary_energy_consumption': '526 kWh/m²/year',
  'epc/peb_reference': '20250227-0003540517-RES-1',
  'certification_-_electrical_installation': 'Not applicable',
  'building_permission_granted': 'No',
  'planning_permission_granted': 'No',
  'preemption_right'

##### Create a dataframe with all properties

In [9]:
df_properties = pd.json_normalize(properties)
df_properties

Unnamed: 0,property_code,type_of_property,price,locality,state_of_the_property,build_year,number_of_bedrooms,livable_surface,furnished,attic,...,5eac6ba7cc054493bc01156c52f523f4.pdf,96fd644af5ea4249beaada48c384a3a3.pdf,f26105c6771c4cc8bb4fb17eaa91dad7.pdf,5eec9cc49d7a477cacea349a321c69a7.pdf,abebf272e3c54660811902d214fb81d1.pdf,surface_kitchen,security_door,surface_bedroom_4,surface_bedroom_5,terrain_width_at_the_roadside
0,RBT69212,Residence,239 000 €,2500 Lier,To be renovated,1899.0,3.0,99 m²,No,Yes,...,,,,,,,,,,
1,RBT67563,Residence,310 000 €,2440 Geel,Normal,1980.0,3.0,123 m²,No,No,...,,,,,,,,,,
2,RBT67578,Apartment,195 000 €,2260 Zoerle-Parwijs,Normal,1967.0,2.0,102 m²,No,No,...,,,,,,,,,,
3,RBT67552,Apartment,302 000 €,2500 Lier,New,2023.0,2.0,93 m²,No,No,...,,,,,,,,,,
4,RBT67551,Apartment,361 200 €,2500 Lier,New,2023.0,2.0,116 m²,No,No,...,,,,,,,,,,
5,RBT66812,Residence,239 000 €,2300 Turnhout,Normal,1973.0,2.0,126 m²,No,No,...,,,,,,,,,,
6,RBT66872,Ground,275 000 €,2650 Edegem,,1954.0,2.0,71 m²,No,No,...,,,,,,,,,,
7,RBT66859,Apartment,169 000 €,2300 Turnhout,To be renovated,1972.0,3.0,118 m²,No,No,...,,,,,,,,,,
8,RBT66819,Residence,275 000 €,2300 Turnhout,Normal,,2.0,133 m²,No,No,...,,,,,,,,,,
9,RBT66818,Residence,239 000 €,2300 Turnhout,,1899.0,3.0,155 m²,No,No,...,,,,,,,,,,


In [11]:
df_properties.to_csv('out.csv', index=False, encoding = "utf-8")