# Import

In [1]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm
import time
import warnings
warnings.filterwarnings('ignore')

In [42]:
import math
import ast

# Parser

In [24]:
def parse_bazaraki_property(html: str, page_url: str) -> dict:
    soup = BeautifulSoup(html, 'html.parser')
    
    def get_text_by_label(label):
        tag = soup.find('span', text=label)
        if tag:
            value_span = tag.find_next('span', class_='value-chars')
            if value_span:
                return value_span.get_text(strip=True)
        return None

    data = {
        'Link': page_url,
        'District': soup.find('a', class_='announcement__location').get_text(strip=True) if soup.find('a', class_='announcement__location') else None,
        'CountOfRooms': get_text_by_label('Bedrooms:'),
        'Floor': get_text_by_label('Floor:'),
        'Square': get_text_by_label('Property area:'),
        'Furnishing': get_text_by_label('Furnishing:'),
        'Parking': get_text_by_label('Parking:'),
        'Balcony': 'Balcony' in soup.get_text(),  # fuzzy match
        'AC': get_text_by_label('Air conditioning:'),
        'EnergyEfficiency': get_text_by_label('Energy Efficiency:'),
        'DistanceToJob': '100 meters from the sea' in soup.get_text(),  # example heuristic
        'DistanceToBeach': '100 meters from the sea' in soup.get_text(),
        'DoubleGlazedWindows': 'double glazed' in soup.get_text().lower(),
        'Elevator': 'elevator' in soup.get_text().lower(),
        'WiFi': 'wifi' in soup.get_text().lower() or 'wi-fi' in soup.get_text().lower(),
        'SoundProofing': 'soundproof' in soup.get_text().lower(),
        'GeoDot': str(get_geo_dot(soup))
    }

    # Extracting price
    price_block = soup.select_one('.announcement-price__cost')
    if price_block:
        price_text = price_block.get_text(strip=True)
        price_match = re.search(r'€\s?([\d,.]+)', price_text)
        data['Price'] = price_match.group(1).replace(',', '') if price_match else None
    else:
        data['Price'] = None

    return data

In [23]:
def get_geo_dot(soup):
    """
    Retrieve geodot from Show on the map script
    :param soup: 
    :return: 
    """
    tag = soup.find('a', class_='js-open-announcement-location', attrs={'data-coords': True})
    
    if tag:
        coords = tag['data-coords']  # e.g. "SRID=4326;POINT (33.0278635 34.6880101)"
        match = re.search(r'POINT\s*\(([\d.]+)\s+([\d.]+)\)', coords)
        if match:
            lon, lat = float(match.group(1)), float(match.group(2))
            return {'lat': lat, 'lon': lon}
    return None

# Links

In [3]:
link_text = f"""
             BazarakiBot, [2 мар. 2025 в 13:05]
Subscription saved to: https://www.bazaraki.com/real-estate-to-rent/apartments-flats/area_min---50/furnishing---1/number-of-bedrooms---2/?ordering=cheapest&price_max=1750&polygon=%7Br%7BrEaj%7BgEbiHx%7BD%7BjAikSdhE%7DvAwoJseWs%7BK~%7BEuW%7CkKrsApoG

BazarakiBot, [2 мар. 2025 в 13:07]
€1350 2-bedroom apartment to rent
Limassol, Limassol - Petrou Kai Pavlou
https://www.bazaraki.com/adv/5645424_2-bedroom-apartment-to-rent/

BazarakiBot, [2 мар. 2025 в 13:07]
€1550 2-bedroom apartment to rent
Limassol, Ypsonas
https://www.bazaraki.com/adv/5590302_2-bedroom-apartment-to-rent/

BazarakiBot, [2 мар. 2025 в 13:07]
€1590 2-bedroom apartment to rent
Limassol, Limassol - Zakaki
https://www.bazaraki.com/adv/5660339_2-bedroom-apartment-to-rent/"""

In [7]:
pattern = r'https://www\.bazaraki\.com/adv/\d+_[\w-]+/'
links_list = re.findall(pattern, link_text)

In [8]:
len(links_list)

3

In [9]:
len(set(links_list))

3

In [10]:
unique_list = list(set(links_list))

# Parse Loops

In [25]:
listing_dicts = []
for link in tqdm(unique_list):
    try:
        # Example usage
        url = link
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers)
        listing_data = parse_bazaraki_property(response.text, url)
        listing_dicts.append(listing_data)
        time.sleep(0.4)
    except Exception as e:
        print(f'Exception on {link}: {e}\n')

100%|██████████| 3/3 [00:03<00:00,  1.33s/it]


In [26]:
prices_df = pd.DataFrame(listing_dicts)

In [27]:
prices_df

Unnamed: 0,Link,District,CountOfRooms,Floor,Square,Furnishing,Parking,Balcony,AC,EnergyEfficiency,DistanceToJob,DistanceToBeach,DoubleGlazedWindows,Elevator,WiFi,SoundProofing,GeoDot,Price
0,https://www.bazaraki.com/adv/5645424_2-bedroom...,"Limassol, Limassol - Petrou Kai Pavlou",,,Covered,"Full, all rooms",Covered,True,"Full, all rooms",B,False,False,False,True,False,False,"{'lat': 34.6880101, 'lon': 33.0278635}",1.35
1,https://www.bazaraki.com/adv/5660339_2-bedroom...,"Limassol, Limassol - Zakaki",2.0,,Covered,3046,Covered,True,"Full, all rooms",A,False,False,False,False,False,False,"{'lat': 34.656304, 'lon': 33.002983}",1.49
2,https://www.bazaraki.com/adv/5590302_2-bedroom...,"Limassol, Ypsonas",2.0,1st,1st,"Full, all rooms",Uncovered,True,"Full, all rooms",A,False,False,False,True,False,False,"{'lat': 34.697375, 'lon': 32.968702}",1.55


# Parse All Links

In [28]:
links_df = pd.read_excel(f'../data/unique_links290325.xlsx')

In [32]:
listing_dicts = []
for link in tqdm(links_df['link'].to_list()):
    try:
        # Example usage
        url = link
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers)
        listing_data = parse_bazaraki_property(response.text, url)
        listing_dicts.append(listing_data)
        time.sleep(0.1)
    except Exception as e:
        print(f'Exception on {link}: {e}\n')
prices_df = pd.DataFrame(listing_dicts)

100%|██████████| 155/155 [02:24<00:00,  1.08it/s]


In [37]:
prices_df.loc[prices_df['GeoDot']!='None'].shape

(128, 18)

In [38]:
prices_df.to_excel(f'../data/prices_df290325.xlsx', index=False)

In [40]:
prices_df['GeoDot']

0                                        None
1                                        None
2        {'lat': 34.68205, 'lon': 33.0006363}
3        {'lat': 34.656304, 'lon': 33.002983}
4        {'lat': 34.710598, 'lon': 33.133668}
                        ...                  
150    {'lat': 34.6657927, 'lon': 33.0034017}
151      {'lat': 34.703698, 'lon': 32.996048}
152    {'lat': 34.6880101, 'lon': 33.0278635}
153     {'lat': 34.677058, 'lon': 33.0073458}
154      {'lat': 34.656304, 'lon': 33.002983}
Name: GeoDot, Length: 155, dtype: object

In [41]:
prices_df

Unnamed: 0,Link,District,CountOfRooms,Floor,Square,Furnishing,Parking,Balcony,AC,EnergyEfficiency,DistanceToJob,DistanceToBeach,DoubleGlazedWindows,Elevator,WiFi,SoundProofing,GeoDot,Price
0,https://www.bazaraki.com/adv/5678254_2-bedroom...,,,,,,,False,,,False,False,False,False,False,False,,
1,https://www.bazaraki.com/adv/5683377_2-bedroom...,,,,,,,False,,,False,False,False,False,False,False,,
2,https://www.bazaraki.com/adv/5692081_2-bedroom...,"Limassol, Polemidia - Apostolos Varnavas",2,Ground floor,Ground floor,"Full, all rooms",Uncovered,False,"Full, all rooms",,False,False,False,False,False,False,"{'lat': 34.68205, 'lon': 33.0006363}",1.100
3,https://www.bazaraki.com/adv/5675339_2-bedroom...,"Limassol, Limassol - Zakaki",2,3rd,3rd,3046,Covered,True,"Full, all rooms",A,False,False,False,True,False,False,"{'lat': 34.656304, 'lon': 33.002983}",1.350
4,https://www.bazaraki.com/adv/5631055_2-bedroom...,"Limassol, Agios Tychon Tourist Area",1,1st,1st,4531,Uncovered,False,"Full, all rooms",B,False,False,False,True,False,False,"{'lat': 34.710598, 'lon': 33.133668}",1.300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,https://www.bazaraki.com/adv/5642861_2-bedroom...,"Limassol, Limassol - Omonia",2,2nd,2nd,"Full, all rooms",Uncovered,True,"Full, all rooms",A,False,False,False,False,False,False,"{'lat': 34.6657927, 'lon': 33.0034017}",1.100
151,https://www.bazaraki.com/adv/5712047_2-bedroom...,"Limassol, Polemidia Pano",1,1st,1st,"Full, all rooms",Covered,True,"Full, all rooms",B+,False,False,False,True,False,False,"{'lat': 34.703698, 'lon': 32.996048}",1.200
152,https://www.bazaraki.com/adv/5677113_2-bedroom...,"Limassol, Limassol - Petrou Kai Pavlou",1,2nd,2nd,"Full, all rooms",Covered,True,"Full, all rooms",A,False,False,False,True,False,False,"{'lat': 34.6880101, 'lon': 33.0278635}",1.200
153,https://www.bazaraki.com/adv/5642123_2-bedroom...,"Limassol, Limassol - Agios Spyridon",1,,Uncovered,3051,Uncovered,True,"Full, all rooms",,False,False,False,True,False,False,"{'lat': 34.677058, 'lon': 33.0073458}",1.350


# Search for Closest locations

In [45]:
def add_distance_column(df: pd.DataFrame, target_point: dict, geo_col='GeoDot') -> pd.DataFrame:
    distances = []

    for val in df[geo_col]:
        try:
            if val == "None" or pd.isna(val):
                distances.append(None)
                continue

            # Safely parse the string to a dict
            geo = ast.literal_eval(val)

            dist = haversine(
                lat1=geo['lat'],
                lon1=geo['lon'],
                lat2=target_point['lat'],
                lon2=target_point['lon']
            )
            distances.append(dist)
        except Exception:
            distances.append(None)

    df = df.copy()
    df["DistanceKm"] = distances
    df = df.sort_values(by="DistanceKm", ascending=True, na_position='last')
    return df

In [156]:
new_df = add_distance_column(prices_df.copy(), 
                             target_point={'lat': 34.711125, 'lon': 33.059491}, 
                             geo_col='GeoDot')

In [157]:
new_df['PriceFl'] = new_df['Price'].astype(str).str.replace('.', '').str.replace('None', '0').astype(float)

In [158]:
new_df1 = new_df.loc[new_df['DistanceKm']<=2].copy()

In [159]:
new_df1.shape[0]

12

In [160]:
new_df1['Link'].to_list()

['https://www.bazaraki.com/adv/5690474_2-bedroom-apartment-to-rent/',
 'https://www.bazaraki.com/adv/5669603_2-bedroom-apartment-to-rent/',
 'https://www.bazaraki.com/adv/5517035_2-bedroom-detached-house-to-rent/',
 'https://www.bazaraki.com/adv/5708926_2-bedroom-apartment-to-rent/',
 'https://www.bazaraki.com/adv/5715425_2-bedroom-penthouse-to-rent/',
 'https://www.bazaraki.com/adv/5624938_2-bedroom-apartment-to-rent/',
 'https://www.bazaraki.com/adv/5492010_2-bedroom-apartment-to-rent/',
 'https://www.bazaraki.com/adv/5665425_2-bedroom-apartment-to-rent/',
 'https://www.bazaraki.com/adv/5708561_2-bedroom-apartment-to-rent/',
 'https://www.bazaraki.com/adv/5598297_2-bedroom-apartment-to-rent/',
 'https://www.bazaraki.com/adv/5634085_2-bedroom-apartment-to-rent/',
 'https://www.bazaraki.com/adv/5705448_2-bedroom-apartment-to-rent/']

In [161]:
print(f"Price min: {new_df1['PriceFl'].min()}")
print(f"Price max: {new_df1['PriceFl'].max()}")
print(f"Price mean: {new_df1['PriceFl'].mean()}")
print(f"Price median: {new_df1['PriceFl'].median()}")

Price min: 1000.0
Price max: 1700.0
Price mean: 1300.0
Price median: 1300.0


In [142]:
new_df1

Unnamed: 0,Link,District,CountOfRooms,Floor,Square,Furnishing,Parking,Balcony,AC,EnergyEfficiency,DistanceToJob,DistanceToBeach,DoubleGlazedWindows,Elevator,WiFi,SoundProofing,GeoDot,Price,DistanceKm,PriceFl
62,https://www.bazaraki.com/adv/5665425_2-bedroom...,"Limassol, Limassol - Linopetra",1,2nd,2nd,"Full, all rooms",Uncovered,True,"Full, all rooms",,False,False,False,True,False,False,"{'lat': 34.6973675, 'lon': 33.0675682}",1.2,0.875105,1200.0
44,https://www.bazaraki.com/adv/5682838_2-bedroom...,"Limassol, Limassol - Neapolis",1,5th,5th,"Full, all rooms",Uncovered,True,"Full, all rooms",,False,False,False,True,False,False,"{'lat': 34.6893639, 'lon': 33.0539774}",1.2,0.963298,1200.0
131,https://www.bazaraki.com/adv/5684644_2-bedroom...,"Limassol, Limassol - Neapolis",1,5th,5th,"Full, all rooms",Covered,True,"Full, all rooms",A,False,False,False,True,False,False,"{'lat': 34.6893639, 'lon': 33.0539774}",1.4,0.963298,1400.0
97,https://www.bazaraki.com/adv/5667831_2-bedroom...,"Limassol, Limassol - Neapolis",1,2nd,2nd,3107,Uncovered,True,"Full, all rooms",B+,False,False,False,False,False,False,"{'lat': 34.6893639, 'lon': 33.0539774}",1.3,0.963298,1300.0
129,https://www.bazaraki.com/adv/5708036_2-bedroom...,"Limassol, Limassol - Neapolis",1,2nd,2nd,"Full, all rooms",Uncovered,True,"Full, all rooms",B,True,True,False,False,False,False,"{'lat': 34.6893639, 'lon': 33.0539774}",1.3,0.963298,1300.0
128,https://www.bazaraki.com/adv/5703404_2-bedroom...,"Limassol, Limassol - Neapolis",1,5th,5th,"Full, all rooms",Covered,True,"Full, all rooms",,False,False,False,True,False,False,"{'lat': 34.6893639, 'lon': 33.0539774}",1.15,0.963298,1150.0
92,https://www.bazaraki.com/adv/5669795_2-bedroom...,"Limassol, Limassol - Neapolis",1,2nd,2nd,"Full, all rooms",Uncovered,True,"Full, all rooms",B,True,True,False,False,False,False,"{'lat': 34.6893639, 'lon': 33.0539774}",1.3,0.963298,1300.0
88,https://www.bazaraki.com/adv/5685682_2-bedroom...,"Limassol, Limassol - Neapolis",1,2nd,2nd,3107,Uncovered,True,"Full, all rooms",B,False,False,False,False,False,False,"{'lat': 34.6893639, 'lon': 33.0539774}",1.3,0.963298,1300.0


np.float64(1326.6666666666667)

np.float64(1300.0)

# Drafts

In [43]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in km

    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    a = math.sin(delta_phi / 2)**2 + \
        math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2)**2

    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    return R * c

In [76]:
new_df1.T

Unnamed: 0,152,20,138,40,144,47,48,16,123,107,46,25,130,103,141,94,51
Link,https://www.bazaraki.com/adv/5677113_2-bedroom...,https://www.bazaraki.com/adv/5645424_2-bedroom...,https://www.bazaraki.com/adv/5697264_2-bedroom...,https://www.bazaraki.com/adv/5635691_2-bedroom...,https://www.bazaraki.com/adv/5692055_2-bedroom...,https://www.bazaraki.com/adv/5627688_2-bedroom...,https://www.bazaraki.com/adv/5685131_2-bedroom...,https://www.bazaraki.com/adv/5689660_2-bedroom...,https://www.bazaraki.com/adv/5610072_2-bedroom...,https://www.bazaraki.com/adv/5664577_2-bedroom...,https://www.bazaraki.com/adv/5697467_2-bedroom...,https://www.bazaraki.com/adv/5704773_2-bedroom...,https://www.bazaraki.com/adv/5696839_2-bedroom...,https://www.bazaraki.com/adv/5710260_2-bedroom...,https://www.bazaraki.com/adv/5613596_1-bedroom...,https://www.bazaraki.com/adv/5707844_2-bedroom...,https://www.bazaraki.com/adv/5461258_2-bedroom...
District,"Limassol, Limassol - Petrou Kai Pavlou","Limassol, Limassol - Petrou Kai Pavlou","Limassol, Limassol - Agios Nektarios","Limassol, Limassol - Agia Zoni","Limassol, Polemidia Kato","Limassol, Polemidia Kato","Limassol, Polemidia Kato","Limassol, Polemidia Kato","Limassol, Polemidia Kato","Limassol, Polemidia Kato","Limassol, Limassol - Katholiki","Limassol, Limassol - Katholiki","Limassol, Limassol - Katholiki","Limassol, Limassol - Katholiki","Limassol, Limassol - Katholiki","Limassol, Limassol - Katholiki","Limassol, Limassol - Katholiki"
CountOfRooms,1,,1,1,2,2,1,1,1,2,1,1,1,1,1,1,2
Floor,2nd,,Ground floor,1st,3rd,2nd,Ground floor,1st,3rd,1st,3rd,3rd,3rd,3rd,Ground floor,3rd,1st
Square,2nd,Covered,Ground floor,1st,3rd,2nd,Ground floor,1st,3rd,1st,3rd,3rd,3rd,3rd,Ground floor,3rd,1st
Furnishing,"Full, all rooms","Full, all rooms",3090,"Full, all rooms","Full, all rooms","Full, all rooms","Full, all rooms","Full, all rooms",,"Full, all rooms","Full, all rooms","Full, all rooms",3020,"Full, all rooms",3075,3020,"Full, all rooms"
Parking,Covered,Covered,Covered,Covered,Uncovered,Covered,Uncovered,Covered,Covered,Covered,Covered,Covered,Covered,Covered,Uncovered,No,Uncovered
Balcony,True,True,True,False,True,True,False,True,False,False,False,True,True,False,True,True,True
AC,"Full, all rooms","Full, all rooms",Partly,"Full, all rooms","Full, all rooms","Full, all rooms","Full, all rooms","Full, all rooms",,"Full, all rooms","Full, all rooms","Full, all rooms","Full, all rooms","Full, all rooms","Full, all rooms","Full, all rooms","Full, all rooms"
EnergyEfficiency,A,B,B+,C,A,,,A,,A,B+,B,B,,,A,B+
