# 04 Risk & Yield Mapping
Combine hedonic residuals, official sales and rents to compute district yields and risk quadrants.

### Load hedonic residuals and official sales and rents data

In [1]:
import sys
from pathlib import Path
import pandas as pd

ROOT = Path.cwd()
if ROOT.name == 'notebooks':
    ROOT = ROOT.parent

sale_path = ROOT / 'data/official/official_sale_flat_final.csv'
rent_path = ROOT / 'data/official/official_rent_flat_final.csv'
resid_path = ROOT / 'data/processed/district_residuals.csv'
df_sale = pd.read_csv(sale_path)
df_rent = pd.read_csv(rent_path)
if resid_path.exists():
    df_resid = pd.read_csv(resid_path)
else:
    df_resid = pd.DataFrame(columns=['district','resid'])

for df in (df_sale, df_rent, df_resid):
    if 'district' in df.columns:
        df['district'] = df['district'].str.strip()

sale_set = set(df_sale['district'])
rent_set = set(df_rent['district'])
resid_set = set(df_resid['district']) if not df_resid.empty else set()
common = sale_set & rent_set
if resid_set:
    common &= resid_set

if common:
    df_sale = df_sale[df_sale['district'].isin(common)].copy()
    df_rent = df_rent[df_rent['district'].isin(common)].copy()
    if not df_resid.empty:
        df_resid = df_resid[df_resid['district'].isin(common)].copy()


### Prepare for consumption

In [2]:
sale_metrics = df_sale[['district','ppm2_all']].copy()
sale_metrics['sale_ppm2'] = pd.to_numeric(sale_metrics['ppm2_all'], errors='coerce')
sale_metrics = sale_metrics[['district','sale_ppm2']]
rent_metrics = df_rent[['district','rent_ppm2_all']].copy()
rent_metrics['rent_ppm2'] = pd.to_numeric(rent_metrics['rent_ppm2_all'], errors='coerce')
rent_metrics = rent_metrics[['district','rent_ppm2']]
metrics = sale_metrics.merge(rent_metrics, on='district', how='outer')
metrics = metrics.merge(df_resid[['district','resid']], on='district', how='left')
metrics.head()


Unnamed: 0,district,sale_ppm2,rent_ppm2,resid
0,Банишора,2470,8.6,3.077277e-14
1,Белите брези,3088,9.25,3.65422e-14
2,Борово,2861,8.0,3.274675e-14
3,Бояна,2690,10.02,4.413564e-14
4,Бъкстон,2505,8.95,5.03724e-14


### Prepare risk and yield

In [None]:
metrics = metrics.dropna(subset=['sale_ppm2','rent_ppm2'])
metrics['yield_pct'] = (metrics['rent_ppm2'] / metrics['sale_ppm2']) * 100
metrics['resid'] = metrics['resid'].fillna(0)
metrics['yield_pct'] = metrics['yield_pct'].round(2)
metrics[['district','sale_ppm2','rent_ppm2','yield_pct','resid']].head()

Unnamed: 0,district,sale_ppm2,rent_ppm2,yield_pct,resid
0,Банишора,2470,8.6,0.35,3.077277e-14
1,Белите брези,3088,9.25,0.3,3.65422e-14
2,Борово,2861,8.0,0.28,3.274675e-14
3,Бояна,2690,10.02,0.37,4.413564e-14
4,Бъкстон,2505,8.95,0.36,5.03724e-14


### Combine and classify

In [4]:
yield_med = metrics['yield_pct'].median()
resid_med = metrics['resid'].median()
def classify(row):
    if pd.isna(row['yield_pct']) or pd.isna(row['resid']):
        return 'unknown'
    if row['yield_pct'] < yield_med and row['resid'] > resid_med:
        return 'overvalued_low_yield'
    if row['yield_pct'] > yield_med and row['resid'] > resid_med:
        return 'high_price_high_yield'
    if row['yield_pct'] < yield_med and row['resid'] < resid_med:
        return 'low_price_low_yield'
    if row['yield_pct'] > yield_med and row['resid'] < resid_med:
        return 'undervalued_high_yield'
    return 'speculative'
metrics['quadrant'] = metrics.apply(classify, axis=1)
metrics[['district','yield_pct','resid','quadrant']].head()


Unnamed: 0,district,yield_pct,resid,quadrant
0,Банишора,0.35,3.077277e-14,low_price_low_yield
1,Белите брези,0.3,3.65422e-14,low_price_low_yield
2,Борово,0.28,3.274675e-14,low_price_low_yield
3,Бояна,0.37,4.413564e-14,high_price_high_yield
4,Бъкстон,0.36,5.03724e-14,speculative


### Visualize interactive

In [5]:
import plotly.express as px

metrics = metrics.reset_index(drop=True)
metrics['plot_id'] = metrics.index + 1
palette = {
    'overvalued_low_yield': '#d95f02',
    'high_price_high_yield': '#1b9e77',
    'low_price_low_yield': '#7570b3',
    'undervalued_high_yield': '#e7298a',
    'speculative': '#999999'
}

px_fig = px.scatter(
    metrics,
    x='yield_pct',
    y='resid',
    color='quadrant',
    hover_name='district',
    hover_data={'yield_pct': True, 'resid': True, 'plot_id': True},
    color_discrete_map=palette,
    labels={'resid': 'Mean residual', 'yield_pct': 'Yield (%)'},
    title='Interactive Yield vs Residual'
)
px_fig.update_traces(marker=dict(size=10))
px_fig.add_vline(x=yield_med, line_dash='dash', line_color='black')
px_fig.add_hline(y=resid_med, line_dash='dash', line_color='black')
px_fig


### Save results

In [6]:
output = ROOT / 'data/processed/district_metrics.csv'
metrics.to_csv(output, index=False)
output

PosixPath('/home/gogo/dev/sofia-real-estate-risk-map/data/processed/district_metrics.csv')