In [16]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import unicodedata 

def scrape_court_prices(url, csv_filename="cennik_kortow_czyste_ascii.csv"):

    all_court_pricing_data = []

    try:
        response = requests.get(url)
        response.raise_for_status() 
        html_content = response.text

        soup = BeautifulSoup(html_content, 'html.parser')

        korty_heading_strong = soup.find('strong', string='Korty:')

        if korty_heading_strong:
            common_parent_of_pricing_blocks = korty_heading_strong.find_parent('div').find_parent('div')

            if common_parent_of_pricing_blocks:
                pricing_blocks = common_parent_of_pricing_blocks.find_all(
                    lambda tag: tag.name == 'div' and tag.find('strong', string=re.compile(r'Poniedziałek – Piątek|Weekendy i święta'))
                )
            else:
                print("Nie znaleziono wspolnego rodzica dla blokow cenowych.")
                return []
        else:
            print("Nie znaleziono naglowka 'Korty:'.")
            return []

        if not pricing_blocks:
            print("Nie znaleziono blokow cenowych. Sprawdz selektory lub strukture strony.")
            return []

   
        def clean_to_ascii(text):
            if text is None:
                return None
            text = str(text)
            
        
            text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
            
          
            text = text.replace(' – ', ' - ').replace('zł/h', '').replace('/h', '').replace('zł', '') 
            text = text.strip() 
           
            
            return text

        def extract_price_number(price_text):
            if price_text is None:
                return None
            
            cleaned_price = re.sub(r'[^\d,.]', '', price_text)
            
           
            if ',' in cleaned_price and '.' not in cleaned_price:
                cleaned_price = cleaned_price.replace(',', '.')
            
            try:
                return float(cleaned_price)
            except ValueError:
                return None


        for block in pricing_blocks:
            data = {}

            days_strong_tag = block.find('strong')
            if days_strong_tag:
                data['days'] = clean_to_ascii(days_strong_tag.get_text(strip=True))

                time_node = days_strong_tag.next_sibling
                if time_node and isinstance(time_node, str):
                    time_raw = time_node.replace('\xa0', ' ').replace('&nbsp;', ' ').strip()
                    time_match = re.search(r'(\d{1,2}:\d{2} – \d{1,2}:\d{2})', time_raw)
                    if time_match:
                        data['time_range'] = time_match.group(1).replace(' – ', ' - ')
                    else:
                        data['time_range'] = None
                else:
                    data['time_range'] = None

            all_strong_tags_in_block = block.find_all('strong')

            if len(all_strong_tags_in_block) >= 2:
                price_regular_raw = all_strong_tags_in_block[1].get_text(strip=True)
                data['price_regular'] = extract_price_number(price_regular_raw)

                discount_text_node = all_strong_tags_in_block[1].next_sibling
                if discount_text_node and isinstance(discount_text_node, str):
                    match = re.search(r'\(w karnecie\s*(\d+[,.]?\d*\s*zł/h)\)', discount_text_node) 
                    if match:
                        price_discounted_raw = match.group(1)
                        data['price_discounted'] = extract_price_number(price_discounted_raw)
                    else:
                        data['price_discounted'] = None
                else:
                    data['price_discounted'] = None
            else:
                data['price_regular'] = None
                data['price_discounted'] = None

            if data:
                all_court_pricing_data.append(data)

        df = pd.DataFrame(all_court_pricing_data)
        

        df.to_csv(csv_filename, index=False, encoding='utf-8')
        print(f"\nDane zostaly zapisane do pliku '{csv_filename}'")

    except requests.exceptions.RequestException as e:
        print(f"Blad podczas pobierania strony: {e}")
    except Exception as e:
        print(f"Wystapil nieoczekiwany blad: {e}")

    return all_court_pricing_data


url = "https://www.kortypraga.pl/cennik-2-2/"


pricing_info = scrape_court_prices(url)

if pricing_info:
    print("Zeskrobane dane o cenach kortow (rowniez wyswietlone w konsoli):")
    for item in pricing_info:
        print(item)
else:
    print("Nie udalo sie zeskrobac danych o cenach kortow.")


Dane zostaly zapisane do pliku 'cennik_kortow_czyste_ascii.csv'
Zeskrobane dane o cenach kortow (rowniez wyswietlone w konsoli):
{'days': 'Poniedziaek  Piatek', 'time_range': '7:00 - 17:00', 'price_regular': 65.0, 'price_discounted': 55.0}
{'days': 'Poniedziaek  Piatek', 'time_range': '17:00 - 23:00', 'price_regular': 95.0, 'price_discounted': 85.0}
{'days': 'Weekendy i swieta', 'time_range': '8:00 - 22:00', 'price_regular': 80.0, 'price_discounted': 70.0}
