In [1]:
import pandas as pd
import re

def parse_price(raw):
    if isinstance(raw, (int, float)): return int(round(raw))
    s = re.sub(r"[^\d\.,]", "", str(raw))
    if "," in s and "." in s:
        if s.find(",") < s.find("."):
            s = s.replace(",", "")
        else:
            s = s.replace(".", "").replace(",", ".")
    elif "," in s:
        s = s.replace(",", ".") if s.count(",") == 1 else s.replace(",", "")
    elif s.count(".") > 1:
        s = s.replace(".", "")
    try:
        return int(round(float(s)))
    except:
        return None

# load raw data
df = pd.read_json("data/raw_listings.jsonl", lines=True)

# parse prices into a numeric column
df["numeric_price"] = df["price"].apply(parse_price)

# split
sale = df[df.sale_type == "buy"]["numeric_price"].dropna()
rent = df[df.sale_type == "rent"]["numeric_price"].dropna()

print(f"Median sale price: CHF {int(sale.median()):,}")
print(f"Median rent price: CHF {int(rent.median()):,} / month")


Median sale price: CHF 1,000,000
Median rent price: CHF 1,420 / month
