# Import

In [1]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm
import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
import math
import ast

# Parser

In [3]:
def parse_bazaraki_property(html: str, page_url: str) -> dict:
    soup = BeautifulSoup(html, 'html.parser')
    
    def get_text_by_label(label):
        tag = soup.find('span', text=label)
        if tag:
            value_span = tag.find_next('span', class_='value-chars')
            if value_span:
                return value_span.get_text(strip=True)
        return None

    data = {
        'Link': page_url,
        'District': soup.find('a', class_='announcement__location').get_text(strip=True) if soup.find('a', class_='announcement__location') else None,
        'CountOfRooms': get_text_by_label('Bedrooms:'),
        'Floor': get_text_by_label('Floor:'),
        'Square': get_text_by_label('Property area:'),
        'Furnishing': get_text_by_label('Furnishing:'),
        'Parking': get_text_by_label('Parking:'),
        'Balcony': 'Balcony' in soup.get_text(),  # fuzzy match
        'AC': get_text_by_label('Air conditioning:'),
        'EnergyEfficiency': get_text_by_label('Energy Efficiency:'),
        'DistanceToJob': '100 meters from the sea' in soup.get_text(),  # example heuristic
        'DistanceToBeach': '100 meters from the sea' in soup.get_text(),
        'DoubleGlazedWindows': 'double glazed' in soup.get_text().lower(),
        'Elevator': 'elevator' in soup.get_text().lower(),
        'WiFi': 'wifi' in soup.get_text().lower() or 'wi-fi' in soup.get_text().lower(),
        'SoundProofing': 'soundproof' in soup.get_text().lower(),
        'GeoDot': str(get_geo_dot(soup))
    }

    # Extracting price
    price_block = soup.select_one('.announcement-price__cost')
    if price_block:
        price_text = price_block.get_text(strip=True)
        price_match = re.search(r'€\s?([\d,.]+)', price_text)
        data['Price'] = price_match.group(1).replace(',', '') if price_match else None
    else:
        data['Price'] = None

    return data

In [4]:
def get_geo_dot(soup):
    """
    Retrieve geodot from Show on the map script
    :param soup: 
    :return: 
    """
    tag = soup.find('a', class_='js-open-announcement-location', attrs={'data-coords': True})
    
    if tag:
        coords = tag['data-coords']  # e.g. "SRID=4326;POINT (33.0278635 34.6880101)"
        match = re.search(r'POINT\s*\(([\d.]+)\s+([\d.]+)\)', coords)
        if match:
            lon, lat = float(match.group(1)), float(match.group(2))
            return {'lat': lat, 'lon': lon}
    return None

In [17]:
url = 'https://www.bazaraki.com/adv/5694629_1-bedroom-apartment-to-rent/?p=1'
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
listing_data = parse_bazaraki_property(response.text, url)


In [18]:
listing_data

{'Link': 'https://www.bazaraki.com/adv/5694629_1-bedroom-apartment-to-rent/?p=1',
 'District': 'Limassol, Germasogeia',
 'CountOfRooms': '1',
 'Floor': '1st',
 'Square': '1st',
 'Furnishing': 'Full, all rooms',
 'Parking': 'No',
 'Balcony': True,
 'AC': 'Full, all rooms',
 'EnergyEfficiency': 'A',
 'DistanceToJob': False,
 'DistanceToBeach': False,
 'DoubleGlazedWindows': False,
 'Elevator': True,
 'WiFi': False,
 'SoundProofing': False,
 'GeoDot': "{'lat': 34.700778, 'lon': 33.055763}",
 'Price': '1.690'}

In [19]:
listing_data['GeoDot'].replace("{", "").\
    replace("}", "").replace("'", "").replace(":", "").\
    replace("lat", "").replace("lon", "")

' 34.700778,  33.055763'

In [ ]:
links_str = f"""
             https://www.bazaraki.com/adv/5711809_1-bedroom-apartment-to-rent/?p=1
             
             
             
             """