# URL, przetwarzanie stron

In [1]:
import requests
from bs4 import BeautifulSoup
from tabulate import tabulate

url = "https://www.otodom.pl/pl/wyniki/sprzedaz/mieszkanie/malopolskie/krakow/krakow/krakow?limit=36&ownerTypeSingleSelect=ALL&by=DEFAULT&direction=DESC&viewType=listing&page="


headers = {
    "User-Agent": "Mozilla/5.0"
}

neighborhoods = {
    'Grzegórzki', 'Swoszowice', 'Stare Miasto', 'Prądnik Czerwony', 'Zwierzyniec', 'Bronowice', 'Prądnik Biały', 'Dębniki', 'Krowodrza', 'Łagiewniki-Borek Fałęcki', 'Podgórze Duchackie', 'Bieżanów-Prokocim', 'Podgórze', 'Czyżyny', 'Mistrzejowice', 'Bieńczyce', 'Wzgórza Krzesławickie', 'Nowa Huta'
}


In [2]:
pages = []

for page in range(1, 10):
    ready_url = f"{url}{page}"
    response = requests.get(ready_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        pages.append(soup)

        print(f"Status: {response.status_code}. Przetwarzam strone: {page}")

Status: 200. Przetwarzam strone: 1
Status: 200. Przetwarzam strone: 2
Status: 200. Przetwarzam strone: 3
Status: 200. Przetwarzam strone: 4
Status: 200. Przetwarzam strone: 5
Status: 200. Przetwarzam strone: 6
Status: 200. Przetwarzam strone: 7
Status: 200. Przetwarzam strone: 8
Status: 200. Przetwarzam strone: 9


In [6]:
import re

flat_info = {}

for page_index, page in enumerate(pages):
    all_flats = page.find('div', attrs={'data-cy': 'search.listing.organic'}).find_all('li')
    for flat_index, flat in enumerate(all_flats):
        unique_key = f"{page_index}_{flat_index}"
        
        title = flat.find('div', class_="css-12h460e efr035y1")
        if title:
            title = title.text.strip()
            
            district_match = re.search(r', ([^,]+), Kraków', title)
            district = district_match.group(1).strip() if district_match else None
        price = flat.find('span', class_="css-1uwck7i e1a3ad6s0")
        if price:
            price = price.text.strip()
        
        information_of_flat = flat.find('dl', class_="css-uki0wd e12r8p6s1")
        if information_of_flat:
            information_of_flat = information_of_flat.text.strip()
            print(information_of_flat)
            num_of_rooms_match = re.search(r'Liczba pokoi\s*(\d+)', information_of_flat)
            num_of_rooms = num_of_rooms_match.group(1) if num_of_rooms_match else None
            
            perimeter_match = re.search(r'Powierzchnia\s*([\d,]+)\s*m²', information_of_flat)
            perimeter = perimeter_match.group(1).replace(',', '.') if perimeter_match else None
            
            price_per_meter_squared_match = re.search(r'Cena za metr kwadratowy\s*([\d\s]+)zł/m²', information_of_flat)
            price_per_meter_squared = price_per_meter_squared_match.group(1).replace('\xa0', '').replace(' ', '') if price_per_meter_squared_match else None
            
            url = 'https://www.otodom.pl' + flat.find('a', attrs={'data-cy': 'listing-item-link'})['href']
            
            flat_info[unique_key] = {
                'title': title,
                'price': price,
                'num_of_rooms': num_of_rooms,
                'perimeter': perimeter,
                'price_per_meter_squared': price_per_meter_squared,
                'url': url,
                'district': district
                
            }


for unique_key, info in flat_info.items():
    print(f"Unique Key: {unique_key}")
    print(f"Title: {info['title']}")
    print(f"Price: {info['price']}")
    print(f"Number of Rooms: {info['num_of_rooms']}")
    print(f"Perimeter: {info['perimeter']}")
    print(f"Price per Meter Squared: {info['price_per_meter_squared']}")
    print(f"URL: {info['url']}")
    print(f"District: {info['district']}\n")
    

# Wizualizacja w postaci tabeli

In [4]:
# Konwersja danych na listę list dla tabulate
table = []
for unique_key, info in flat_info.items():
    row = [unique_key]
    row.extend([info[key] for key in ['title', 'price', 'num_of_rooms', 'perimeter', 'price_per_meter_squared', 'url', 'district']])
    table.append(row)

# Wyświetlanie danych w postaci tabeli
headers = ['Unique Key', 'Title', 'Price', 'Number of Rooms', 'Perimeter', 'Price per Meter Squared', 'URL', 'District']
print(tabulate(table, headers=headers, tablefmt="pipe"))


| Unique Key   | Title   | Price   | Number of Rooms   | Perimeter   | Price per Meter Squared   | URL   | District   |
|--------------|---------|---------|-------------------|-------------|---------------------------|-------|------------|


In [5]:
import matplotlib.pyplot as plt
