In [2]:
%load_ext autoreload
%autoreload 2
%xmode Plain

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Exception reporting mode: Plain


In [3]:
import pandas as pd
import plotly
import plotly.express as px 
import dtale
import numpy as np
import random
from bazaraki import utils
from tqdm import tqdm
import swifter
from pathlib import Path
from datetime import datetime

In [4]:
tqdm.pandas()
pd.set_option('display.max_rows', 100)  # Disable row limit
pd.set_option('display.max_columns', 60)  # Disable column limit
pd.set_option('display.width', 20)  # Disable line width limit
pd.set_option('display.max_colwidth', 100)  # Disable column width limit
pd.set_option('display.precision', 2)  
pd.set_option('display.float_format', '{:.4f}'.format)  
np.random.seed(42)
random.seed(42)
pd.options.plotting.backend = "plotly"
plotly.io.renderers.default = "notebook_connected"
from pandarallel import pandarallel

pandarallel.initialize(progress_bar=True)

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [5]:
df = utils.read_dfs("output/*.parquet")

Reading output/2024-12-12 18:34:25 real-estate-to-rent_real-estate-for-sale.parquet
Reading output/2024-12-14 11:44:22 real-estate-to-rent_real-estate-for-sale.parquet
Total: 35559 read: 34669 new: 745 deleted: 890
Reading output/2024-12-15 18:00:14 real-estate-to-rent_real-estate-for-sale.parquet
Total: 35734 read: 34398 new: 175 deleted: 490
Reading output/2024-12-16 23:13:52 real-estate-to-rent_real-estate-for-sale.parquet
Total: 36281 read: 34510 new: 547 deleted: 647
Reading output/2024-12-17 21:31:02 real-estate-to-rent_real-estate-for-sale.parquet
Total: 37085 read: 34681 new: 804 deleted: 800
Reading output/2024-12-18 23:01:10 real-estate-to-rent_real-estate-for-sale.parquet
Total: 37577 read: 34809 new: 492 deleted: 464
Reading output/2024-12-19 22:09:26 real-estate-to-rent_real-estate-for-sale.parquet
Total: 38047 read: 33723 new: 470 deleted: 1661
Reading output/2024-12-22 21:49:27 real-estate-to-rent_real-estate-for-sale.parquet
Total: 39161 read: 33255 new: 1114 deleted: 1

In [6]:
df.iloc[0].T

url                                                 https://www.bazaraki.com/adv/5415213_4-bedroom-detached-house-to-rent/
title                                                                                     4-bedroom detached house to rent
price                                                                                                            1650.0000
original_price                                                                                                         NaN
price_per_sqm                                                                                                          NaN
location                                                                                               Larnaca, Dromolaxia
posted                                                                                                     Yesterday 20:57
reference_number                                                                                                       NaN
views           

In [7]:
df["price_per_sqm"] = df.price / (df["Property area"]).round(2)

In [8]:
def add_city_disctrict_cols(df):
    df[["city", "district"]] = df.location.str.split(",", expand=True)
    return df
df = add_city_disctrict_cols(df)


In [9]:
df = utils.filter_in(df, "`Property area` > 10 and `Property area` < 300")

removing 21108/86756 rows


In [10]:
saledf = df.query("cat1 == 'Apartments, flats for sale' and (Condition == 'Brand new' or Condition == 'Resale' or Condition.isna()) and Bedrooms in ['1', '2', 'Studio']")
rentdf = df.query("cat1 == 'Apartments, flats to rent'")


In [11]:
saledf = utils.filter_in(saledf, "delete_date.isna()")

removing 7057/9792 rows


In [12]:

import math  


def haversine(lat1, lon1, lat2, lon2):  
    """  
    Calculate the great-circle distance between two points on the Earth (specified in decimal degrees).  
    Returns the distance in kilometers.  
    """  
    # Convert decimal degrees to radians  
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])  
    # Haversine formula  
    dlat = lat2 - lat1  
    dlon = lon2 - lon1  
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2  
    c = 2 * math.asin(math.sqrt(a))  
    # Radius of Earth in kilometers (mean radius)  
    r = 6371.0  
    return c * r  

lat1, lon1 = 52.5200, 13.4050  # Berlin  
lat2, lon2 = 52.5205, 13.4095  # Nearby point in Berlin  
print(haversine(lat1, lon1, lat2, lon2))


0.3095045367196025


In [13]:

def rent_filter(rentdf, radius_km, sale_row):
    return rentdf[rentdf.apply(lambda r: haversine(r.lat, r.lng, sale_row.lat, sale_row.lng), axis=1) < radius_km]

def reduce_count(df, sale_row):
    if len(df) < 10:
        return df

    new_df = df[df.Bedrooms == sale_row.Bedrooms]
    if len(new_df) < 3:
        return df
    else:
        return new_df
        
def rent_finder(rentdf, radius_km, verbose, sale_row):
    if verbose:
        print(sale_row.url)
    # Step 1: Calculate the bounding box
    lat_diff = radius_km / 111.32  # Approx. 1 degree latitude = 111.32 km
    lng_diff = radius_km / (111.32 * np.cos(np.radians(sale_row.lat)))  # Adjust for latitude

    # Bounding box
    min_lat = sale_row.lat - lat_diff
    max_lat = sale_row.lat + lat_diff
    min_lng = sale_row.lng - lng_diff
    max_lng = sale_row.lng + lng_diff
    
    # Step 2: Filter rows within the bounding box
    filtered_df = rentdf[(rentdf['lat'] >= min_lat) & (rentdf['lat'] <= max_lat) & (rentdf['lng'] >= min_lng) & (rentdf['lng'] <= max_lng)]    
    filtered = rent_filter(rentdf=filtered_df, radius_km=radius_km, sale_row=sale_row)
    reduced_filtered = reduce_count(df=filtered, sale_row=sale_row)
    filtered = reduced_filtered.sort_values("delete_date", ascending=False, na_position="first")
    min_price_per_sqm = filtered.price_per_sqm.min()
    mean_price_per_sqm = filtered.price_per_sqm.median()
    avg_price_per_sqm = filtered.price_per_sqm.mean()
    max_price_per_sqm = filtered.price_per_sqm.max()
    count = filtered.price_per_sqm.count()
    mean_yield = mean_price_per_sqm * 12 / sale_row.price_per_sqm * 100
    lines = [f"{sale_row.url} {sale_row.city} price={sale_row.price} area={sale_row['Property area']} price_per_sqm={int(sale_row.price_per_sqm)} mean_yield={mean_yield:.2f} " \
                f"max_price_per_sqm:{max_price_per_sqm:.2f} min_price_per_sqm={min_price_per_sqm:.2f} avg_price_per_sqm={avg_price_per_sqm:.2f} " \
                f"mean_price_per_sqm={mean_price_per_sqm:.2f} count={count}"]
    for index, rent_row in list(filtered.iterrows())[:10]:
        lines.append(f"\t{rent_row.url} price={int(rent_row.price)} price_per_sqm={int(rent_row.price_per_sqm)}")
    return dict(mean_yield=mean_yield, text="\n".join(lines))


report_df = saledf.apply(lambda r: rent_finder(rentdf=rentdf, radius_km=1, sale_row=r, verbose=False), axis=1, result_type='expand')
report_df   

Unnamed: 0_level_0,mean_yield,text
ad_id,Unnamed: 1_level_1,Unnamed: 2_level_1
5529817,4.7059,https://www.bazaraki.com/adv/5529817_1-bedroom-apartment-for-sale/ Limassol price=255000.0 area=...
5528482,,https://www.bazaraki.com/adv/5528482_2-bedroom-apartment-for-sale/ Limassol price=360000.0 area=...
5291456,,https://www.bazaraki.com/adv/5291456_studio-apartment-for-sale/ Nicosia price=85000.0 area=37.0 ...
5530886,,https://www.bazaraki.com/adv/5530886_1-bedroom-apartment-for-sale/ Nicosia price=139000.0 area=6...
5486323,6.1647,https://www.bazaraki.com/adv/5486323_1-bedroom-apartment-for-sale/ Limassol price=225000.0 area=...
...,...,...
5749228,,https://www.bazaraki.com/adv/5749228_2-bedroom-apartment-for-sale/ Nicosia price=291000.0 area=8...
5749229,,https://www.bazaraki.com/adv/5749229_2-bedroom-apartment-for-sale/ Larnaca price=221000.0 area=7...
5749236,7.1053,https://www.bazaraki.com/adv/5749236_2-bedroom-apartment-for-sale/ Limassol price=350000.0 area=...
5749238,7.3913,https://www.bazaraki.com/adv/5749238_2-bedroom-apartment-for-sale/ Larnaca price=165000.0 area=8...


In [14]:
now = datetime.now().isoformat(sep=" ", timespec="seconds")
file_path = Path(f"output/reports/{now}_report.txt")
file_path.parent.mkdir(parents=True, exist_ok=True)
with file_path.open("w") as fp:
    for _, row in report_df.sort_values("mean_yield", ascending=False).iterrows():
        fp.write(row.text + "\n")
        

In [15]:
saledf.iloc[1:2].apply(lambda r: rent_finder(rentdf=rentdf, radius_km=1, sale_row=r, verbose=False), axis=1, result_type='expand')

Unnamed: 0_level_0,mean_yield,text
ad_id,Unnamed: 1_level_1,Unnamed: 2_level_1
5528482,,https://www.bazaraki.com/adv/5528482_2-bedroom-apartment-for-sale/ Limassol price=360000.0 area=...


In [16]:
# ENRICHING
def enrich(df):
    resultdf = saledf.parallel_apply(lambda r: rent_finder(rentdf=rentdf, radius_km=1, sale_row=r, verbose=False), axis=1, result_type='expand')
    return pd.concat([df, resultdf], axis=1)

enriched = enrich(saledf)
enriched

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=342), Label(value='0 / 342'))), HB…

Unnamed: 0_level_0,url,title,price,original_price,price_per_sqm,location,posted,reference_number,views,lat,lng,sold,cat0,cat1,Property area,Pets,Type,Parking,Plot area,Furnishing,Included,Online viewing,Air conditioning,Energy Efficiency,Bedrooms,Bathrooms,images,description,Floor,Postal code,Construction year,Reference number,Condition,Square meter price,Minimum stay,Land type,Plot Type,Parcel number,Planning zone,Registration number,Share,Density,Coverage,Registration block,Area,Pick a point,posted_dt,delete_date,city,district,mean_yield,text
ad_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1
5529817,https://www.bazaraki.com/adv/5529817_1-bedroom-apartment-for-sale/,1-bedroom apartment fоr sаle,255000.0000,,5100.0000,"Limassol, Limassol - Agios Ioannis",06.12.2024 16:56,,136,34.6774,33.0228,False,Cyprus real estate for sale,"Apartments, flats for sale",50.0000,,Apartment,No,,,Balcony,,"Full, all rooms",A,1,1,"[https://cdn1.bazaraki.com/media/cache1/4b/ed/4beddcd595c0cd8c2e98f1e1023820f9.webp, https://cdn...","Apartment in Limassol, Agios Ioannis\n\n✨ Cozy 1-bedroom apartment with all essentials:\n✅ Inte...",,,,233492,Brand new,€5.100 /,,,,,,,,,,,,,2024-12-06T16:56:00,,Limassol,Limassol - Agios Ioannis,4.7059,https://www.bazaraki.com/adv/5529817_1-bedroom-apartment-for-sale/ Limassol price=255000.0 area=...
5528482,https://www.bazaraki.com/adv/5528482_2-bedroom-apartment-for-sale/,2-bedroom apartment fоr sаle,360000.0000,,3302.7523,"Limassol, Limassol - Agia Fyla",14.11.2024 08:37,,35,,,False,Cyprus real estate for sale,"Apartments, flats for sale",109.0000,,Apartment,Covered,,Semi-Furnished,"Elevator, Alarm, Balcony, Storage room",No,"Full, all rooms",A,2,2,"[https://cdn1.bazaraki.com/media/cache1/4f/61/4f61e35d0ea3268ba46dfbdcb9c085bf.webp, https://cdn...",Indulge in luxury living in Limassol City with this 2-bedroom apartment presented by TriBro.\nOu...,2nd,3117.0000,2024,,Brand new,€3.303 /,,,,,,,,,,,,,2024-11-14T08:37:00,,Limassol,Limassol - Agia Fyla,,https://www.bazaraki.com/adv/5528482_2-bedroom-apartment-for-sale/ Limassol price=360000.0 area=...
5291456,https://www.bazaraki.com/adv/5291456_studio-apartment-for-sale/,Studio apartment fоr sаle,85000.0000,,2297.2973,"Nicosia, Geri",14.11.2024 09:28,,1179,,,False,Cyprus real estate for sale,"Apartments, flats for sale",37.0000,,Apartment,Covered,,Fully Furnished,"Elevator, Storage room",No,"Full, all rooms",,Studio,1,"[https://cdn1.bazaraki.com/media/cache1/e5/d8/e5d8c177aa738bf3b9d9893019d8b6b7.webp, https://cdn...","Apartment Studio for sale - Geri, Nicosia - Title deed.\nΉσυχη περιοχή με εύκολη πρόσβαση στο Πα...",2nd,,2007,,Resale,€2.297 /,,,,,,,,,,,,,2024-11-14T09:28:00,,Nicosia,Geri,,https://www.bazaraki.com/adv/5291456_studio-apartment-for-sale/ Nicosia price=85000.0 area=37.0 ...
5530886,https://www.bazaraki.com/adv/5530886_1-bedroom-apartment-for-sale/,1-bedroom apartment fоr sаle,139000.0000,,2171.8750,"Nicosia, Latsia - Agios Georgios",15.11.2024 18:36,,93,,,False,Cyprus real estate for sale,"Apartments, flats for sale",64.0000,,Apartment,Covered,,Unfurnished,"Garden, Elevator, Balcony, Storage room",No,No,A,1,1,"[https://cdn1.bazaraki.com/media/cache1/95/8a/958a5e5fa8e171854f400f79c3cdfd11.webp, https://cdn...",🏡 Spacious Apartment for Sale in Latsia – Ready to move in!\nSpacious one bedroom apartment read...,1st,,2024,,Brand new,€2.172 /,,,,,,,,,,,,,2024-11-15T18:36:00,,Nicosia,Latsia - Agios Georgios,,https://www.bazaraki.com/adv/5530886_1-bedroom-apartment-for-sale/ Nicosia price=139000.0 area=6...
5486323,https://www.bazaraki.com/adv/5486323_1-bedroom-apartment-for-sale/,1-bedroom apartment fоr sаle,225000.0000,,4500.0000,"Limassol, Agios Tychon Tourist Area",14.11.2024 23:49,,93,34.7097,33.1280,False,Cyprus real estate for sale,"Apartments, flats for sale",50.0000,,Apartment,No,,,"Pool, Elevator, Balcony",,,A,1,1,"[https://cdn1.bazaraki.com/media/cache1/14/d4/14d4d8f27af67bdbe3554ae7ea1ba019.webp, https://cdn...",For sale 1 bedroom apartment in the tourist area of Limassol. The apartments are located in a ga...,,4532.0000,,233246,Resale,€4.500 /,,,,,,,,,,,,,2024-11-14T23:49:00,,Limassol,Agios Tychon Tourist Area,6.1647,https://www.bazaraki.com/adv/5486323_1-bedroom-apartment-for-sale/ Limassol price=225000.0 area=...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5749228,https://www.bazaraki.com/adv/5749228_2-bedroom-apartment-for-sale/,2-bedroom apartment fоr sаle,291000.0000,,3423.5294,"Nicosia, Egkomi",01:11,,0,,,False,Cyprus real estate for sale,"Apartments, flats for sale",85.0000,,Apartment,Covered,,Unfurnished,Storage room,,"Full, all rooms",A,2,2,"[https://cdn1.bazaraki.com/media/cache1/0a/73/0a73c20eec2348fde3f7cd80ec5beab5.webp, https://cdn...","Καινούργιο διαμέρισμα 2 υπνοδωματίων με εσωτερικό εμβαδόν 85 τμ, περιοχή Έγκωμη, επαρχία Λευκωσί...",2nd,2413.0000,2025,145175,Brand new,€3.424 /,,,,,,,,,,,,,2025-04-12T01:11:00,,Nicosia,Egkomi,,https://www.bazaraki.com/adv/5749228_2-bedroom-apartment-for-sale/ Nicosia price=291000.0 area=8...
5749229,https://www.bazaraki.com/adv/5749229_2-bedroom-apartment-for-sale/,2-bedroom apartment fоr sаle,221000.0000,,2946.6667,"Larnaca, Livadia Larnakas",01:11,,0,,,False,Cyprus real estate for sale,"Apartments, flats for sale",75.0000,,Apartment,Uncovered,,Unfurnished,Balcony,,"Full, all rooms",A,2,2,"[https://cdn1.bazaraki.com/media/cache1/ba/1f/ba1f872bcced0b7d7709ed3bcdc37c3d.webp, https://cdn...","New apartment 2 bedrooms with built area of 75 sqrm, in the area of Leivadia, Livadia within Lar...",1st,7060.0000,2025,145928,Brand new,€2.947 /,,,,,,,,,,,,,2025-04-12T01:11:00,,Larnaca,Livadia Larnakas,,https://www.bazaraki.com/adv/5749229_2-bedroom-apartment-for-sale/ Larnaca price=221000.0 area=7...
5749236,https://www.bazaraki.com/adv/5749236_2-bedroom-apartment-for-sale/,2-bedroom apartment fоr sаle,350000.0000,,3333.3333,"Limassol, Agios Athanasios",01:15,,0,34.7149,33.0630,False,Cyprus real estate for sale,"Apartments, flats for sale",105.0000,,Apartment,Covered,,,Storage room,,,A,2,2,"[https://cdn1.bazaraki.com/media/cache1/c7/56/c75657979f817f4716f4ff4c36cf1a5d.webp, https://cdn...","We are happy to present this new spacious apartment located conveniently on Agios Athanasios, ju...",,,,480031143-90,Resale,€3.333 /,,,,,,,,,,,,,2025-04-12T01:15:00,,Limassol,Agios Athanasios,7.1053,https://www.bazaraki.com/adv/5749236_2-bedroom-apartment-for-sale/ Limassol price=350000.0 area=...
5749238,https://www.bazaraki.com/adv/5749238_2-bedroom-apartment-for-sale/,2-bedroom apartment fоr sаle,165000.0000,,1941.1765,"Larnaca, Larnaka - Sotiros",01:15,,0,34.9174,33.6133,False,Cyprus real estate for sale,"Apartments, flats for sale",85.0000,,Apartment,Covered,,,Storage room,,,C,2,1,"[https://cdn1.bazaraki.com/media/cache1/dd/d3/ddd364fd8b3b144d7bba2d2a13ac17fb.webp, https://cdn...","🏡 For Sale: 2-Bedroom Apartment in Central Drosia, Larnaca\nA fantastic opportunity to own a bri...",,6036.0000,,480091019-286,Resale,€1.941 /,,,,,,,,,,,,,2025-04-12T01:15:00,,Larnaca,Larnaka - Sotiros,7.3913,https://www.bazaraki.com/adv/5749238_2-bedroom-apartment-for-sale/ Larnaca price=165000.0 area=8...


In [17]:
def report(fp, df, rentdf):
    for index, row in tqdm(enriched.sort_values("mean_yield", ascending=False).iterrows()):
        row_str = f"{row.url} {row.city} price={row.price} area={row['Property area']} price_per_sqm={int(row['price_per_sqm'])} mean_yield={row['mean_yield']:.2f} " \
                  f"max_price_per_sqm:{row.max_price_per_sqm:.2f} min_price_per_sqm={row.min_price_per_sqm:.2f} avg_price_per_sqm={row.avg_price_per_sqm:.2f} " \
                  f"mean_price_per_sqm={row.mean_price_per_sqm:.2f} count={row['count']}"
        print(row_str, file=fp)
        for url in row.urls[:10]:
            result = rentdf[rentdf.url == url]
            if not result.empty:
                print(f"\t{url} price={int(result.price)} price_per_sqm={int(result.price_per_sqm)}", file=fp)

now = datetime.now().isoformat(sep=" ", timespec="seconds")
file_path = Path(f"output/reports/{now}_report.txt")
file_path.parent.mkdir(parents=True, exist_ok=True)
with file_path.open("w") as fp:
    report(fp, df=enriched, rentdf=rentdf)

0it [00:00, ?it/s]


AttributeError: 'Series' object has no attribute 'max_price_per_sqm'

In [None]:
enriched.sort_values("yield", price_per_sqm=False).head(20)

TypeError: DataFrame.sort_values() got an unexpected keyword argument 'price_per_sqm'