In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from typing import List

In [2]:
class RealEstateScrapper:
    def __init__(self, base_url: str) -> None:
        self.base_url = base_url
        self.df = pd.DataFrame(columns=['title', 'url', 'price', 'rooms', 'area', 'floor', 'bathrooms', 'energy_class', 'year_built', 'current_conditions', 'type', 'other_info'])

    def paginate_urls(self, num_pages: int) -> List[str]:
        urls = [self.base_url]
        for i in range(2, num_pages + 1):
            urls.append(self.base_url + f"&page={i}")
        
        return urls
    
    def get_individual_ad_main_info(self, url: str, soup: BeautifulSoup) -> None:
        try:
            self.df['title'] = soup.find("div", "in-titleBlock__content").find("h1", "in-titleBlock__title").text.strip()

            self.df['url'] = url

            raw_ad_main_info = soup.find(
                "ul",
                attrs="nd-list nd-list--pipe in-feat in-feat--full in-feat__mainProperty in-landingDetail__mainFeatures",
            ).find_all("li")

            for info in raw_ad_main_info:
                if "€" in info.text:
                    self.df["price"] = info.text.strip()
                elif "locali" in info["aria-label"] or "locale" in info["aria-label"]:
                    self.df["rooms"] = info.text.strip()
                elif "m²" in info.text:
                    self.df["area"] = info.text.strip().replace("da", "")
                elif "piano" in info.text:
                    self.df["floor"] = info.text.strip()
                elif "bagni" in info.text:
                    self.df["bathrooms"] = info.text.strip()
                elif "classe" in info.text:
                    self.df["energy_class"] = info.text.strip()
                elif "anno" in info.text:
                    self.df["year"] = info.text.strip()
                elif "stato" in info.text:
                    self.df["state"] = info.text.strip()
                elif "tipologie" in info["aria-label"]:
                    self.df["type"] = info.text.strip()
                else:
                    self.df["other_info"] = info.text.strip()

        except:
            raise Exception("The page main info scrapping failed")
        
    def main_scrapper(self):
        urls_to_scrape = self.paginate_urls(num_pages=2)

        for url in urls_to_scrape:
            try:
                response = requests.get(url)
                if response.status_code != 200:
                    raise Exception("The page request failed")
                
                html_doc = response.text

                soup = BeautifulSoup(html_doc, "lxml")

            except Exception as e:
                print(e)
                continue

In [None]:
scrapper = RealEstateScrapper(
    "https://www.immobiliare.it/vendita-case/bologna/?criterio=rilevanza"
)

results = scrapper.main_scrapper()
results