## Scrapping one page (Immovlan)

In [None]:
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import pandas as pd

In [None]:
# Url of website
url = 'https://immovlan.be/en/detail/residence/for-sale/2500/lier/rbt69212'


In [None]:
ua = UserAgent()

In [None]:
headers =  {

        "User-Agent": ua.random,
       
        'Accept': (
            'text/html,application/xhtml+xml,application/xml;q=0.9,'
            'image/avif,image/webp,*/*;q=0.8'
        ),
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'TE': 'trailers',
    }


In [None]:
s = requests.Session()
s.headers.update(headers)

In [None]:
response = s.get(url)
content =response.content
print(response)

In [None]:
soup = BeautifulSoup(content, 'html.parser')
soup

##### Get characteristics for property

In [None]:
property_characteristics = dict()

In [None]:
code = soup.find(class_='vlancode')
property_characteristics['property_code'] = code.text

In [None]:
type_of_property = soup.find(class_= 'detail__header_title_main')
property_characteristics['type_of_property'] = type_of_property.text.split()[0]

In [None]:
price = soup.find(class_='detail__header_price_data')
property_characteristics['price'] = price.text

In [None]:
locality = soup.find(class_='city-line')
property_characteristics['locality'] = locality.text

In [None]:
for tag in soup.find_all('h4', class_ =False):
    characteristic_name = "_".join(list(map(str.lower, tag.text.split())))
    property_characteristics[characteristic_name] = tag.find_next().text

In [None]:
for k,v in property_characteristics.items():
    print(f"{k}: {v}")

#### pandas to create a file with all properties

In [None]:
properties = pd.DataFrame.from_records([property_characteristics])
properties

## Scrapping a list of properties

In [1]:
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import random
from time import time, sleep
import pandas as pd


In [2]:
properties = []

In [3]:

urls =['https://immovlan.be/en/detail/residence/for-sale/2018/antwerp/rbt72057', 'https://immovlan.be/en/detail/residence/for-sale/2500/lier/rbt69212', 'https://immovlan.be/en/detail/studio/for-sale/2000/antwerp/rbt71832',
      'https://immovlan.be/en/detail/villa/for-sale/2400/mol/rbt71735', 'https://immovlan.be/en/detail/mixed-building/for-sale/2170/merksem/rbt71857', 'https://immovlan.be/en/detail/chalet/for-sale/2235/hulshout/rbt71880', 
      'https://immovlan.be/en/detail/ground-floor/for-sale/2100/deurne/rbt71973', 'https://immovlan.be/en/detail/penthouse/for-sale/2500/lier/rbt71732', 'https://immovlan.be/en/detail/loft/for-sale/2500/lier/rbt71731']


In [4]:
## Function definition to obtain individual properties

def get_raw_property(url: str, session:requests.Session) -> BeautifulSoup:

    start_time = time()

    sleep(random.uniform(1.5, 3))
    
    try: 
        response = session.get(url)
        content =response.content
        print(response)
        soup = BeautifulSoup(content, 'html.parser')
    except Exception as e:
        print(f"Error {e}")
    
    end_time = time()
    duration = end_time - start_time
    print(f"This scrap has taken {duration}")
    
    return soup    


In [5]:
## Function to obtain a dictionary with the properties
def get_property_characteristics(soup:BeautifulSoup) -> dict:
    
    property_characteristics = dict()

    code = soup.find(class_='vlancode')
    property_characteristics['property_code'] = code.text

    type_of_property = soup.find(class_= 'detail__header_title_main')
    property_characteristics['type_of_property'] = type_of_property.text.split()[0]

    price = soup.find(class_='detail__header_price_data')
    property_characteristics['price'] = price.text

    locality = soup.find(class_='city-line')
    property_characteristics['locality'] = locality.text

    for tag in soup.find_all('h4', class_ =False):
        characteristic_name = "_".join(list(map(str.lower, tag.text.split())))
        property_characteristics[characteristic_name] = tag.find_next().text
        
    for k,v in property_characteristics.items():
        print(f"{k}: {v}")

    return property_characteristics

In [6]:
# Establishing session with headers for connection
ua = UserAgent()
headers =  {

        "User-Agent": ua.random,
       
        'Accept': (
            'text/html,application/xhtml+xml,application/xml;q=0.9,'
            'image/avif,image/webp,*/*;q=0.8'
        ),
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'TE': 'trailers',
    }

s = requests.Session()
s.headers.update(headers)


In [7]:

for url in urls:
    soup = get_raw_property(url, s)
    properties.append(get_property_characteristics(soup))


<Response [200]>
This scrap has taken 3.01963210105896
property_code: RBT72057
type_of_property: Residence
price:   490 000 €  
locality: 2018 Antwerp
state_of_the_property: New
currently_leased: No
build_year: 2023
availability: Immediately
number_of_bedrooms: 3
livable_surface: 117 m²
furnished: No
cellar: No
attic: No
veranda: No
diningroom: No
dressing: No
wash_room: No
bike_storage: No
garage: Yes
number_of_bathrooms: 1
number_of_toilets: 2
elevator: No
orientation_of_the_front_facade: South-west
number_of_facades: 3
number_of_floors: 2
garden: Yes
terrace: Yes
surface_terrace: 28 m²
running_water: Yes
certification_-_electrical_installation: yes, certificate in accordance
preemption_right: No
urbanism_affectation: Residential zone (residential, urban)
flooding_area_type: no flooding area
demarcated_flooding_area: (information not available)
<Response [200]>
This scrap has taken 2.1304314136505127
property_code: RBT69212
type_of_property: Residence
price:   239 000 €  
locality: 2

In [8]:
properties


[{'property_code': 'RBT72057',
  'type_of_property': 'Residence',
  'price': '  490\u202f000 €  ',
  'locality': '2018 Antwerp',
  'state_of_the_property': 'New',
  'currently_leased': 'No',
  'build_year': '2023',
  'availability': 'Immediately',
  'number_of_bedrooms': '3',
  'livable_surface': '117 m²',
  'furnished': 'No',
  'cellar': 'No',
  'attic': 'No',
  'veranda': 'No',
  'diningroom': 'No',
  'dressing': 'No',
  'wash_room': 'No',
  'bike_storage': 'No',
  'garage': 'Yes',
  'number_of_bathrooms': '1',
  'number_of_toilets': '2',
  'elevator': 'No',
  'orientation_of_the_front_facade': 'South-west',
  'number_of_facades': '3',
  'number_of_floors': '2',
  'garden': 'Yes',
  'terrace': 'Yes',
  'surface_terrace': '28 m²',
  'running_water': 'Yes',
  'certification_-_electrical_installation': 'yes, certificate in accordance',
  'preemption_right': 'No',
  'urbanism_affectation': 'Residential zone (residential, urban)',
  'flooding_area_type': 'no flooding area',
  'demarcated_

##### Create a dataframe with all properties

In [10]:
df_properties = pd.DataFrame([properties])
df_properties

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,"{'property_code': 'RBT72057', 'type_of_propert...","{'property_code': 'RBT69212', 'type_of_propert...","{'property_code': 'RBT71832', 'type_of_propert...","{'property_code': 'RBT71735', 'type_of_propert...","{'property_code': 'RBT71857', 'type_of_propert...","{'property_code': 'RBT71880', 'type_of_propert...","{'property_code': 'RBT71973', 'type_of_propert...","{'property_code': 'RBT71732', 'type_of_propert...","{'property_code': 'RBT71731', 'type_of_propert..."
