In [20]:
# ---- Update these paths ----
INPUT_CSV = "G:\Real Estate Market Analysis\Model_2\Model2_AcquisitionPrices_combined.csv"
OUTPUT_CSV = "G:\Real Estate Market Analysis\Model_3\Model3_SellingPrices_combined2.csv"

# Reproducibility
SEED = 42


  INPUT_CSV = "G:\Real Estate Market Analysis\Model_2\Model2_AcquisitionPrices_combined.csv"
  OUTPUT_CSV = "G:\Real Estate Market Analysis\Model_3\Model3_SellingPrices_combined2.csv"


In [21]:
import numpy as np
import pandas as pd
from typing import Dict, Tuple


In [22]:
# -----------------------------
# Model 3 v3 Assumptions
# -----------------------------

# Base tier assignment by age
AGE_TIERS = [
    (20, "Light"),
    (50, "Medium"),
    (10**9, "Heavy"),
]

DISTRESS_BUMP_ENABLED = True

# ↓ Lower & tighter cost-per-sqft bands
COST_PSF_RANGES: Dict[str, Tuple[float, float]] = {
    "Light": (10, 18),
    "Medium": (18, 32),
    "Heavy": (32, 55),
}

# ↓ Smaller bath premiums
BATH_PREMIUM: Dict[str, float] = {
    "Light": 800,
    "Medium": 1400,
    "Heavy": 2200,
}

# ↑ Better aligned uplift ranges
RENO_UPLIFT_RANGES: Dict[str, Tuple[float, float]] = {
    "Light": (0.06, 0.10),
    "Medium": (0.12, 0.20),
    "Heavy": (0.20, 0.32),
}

# Eco + CX uplift ranges
ECO_UPLIFT_RANGE = (0.01, 0.035)
WARRANTY_UPLIFT_RANGE = (0.008, 0.018)
FAST_CLOSE_UPLIFT_RANGE = (0.006, 0.012)

# Strategy policy
DEFAULT_WARRANTY_FLAG = 1
FAST_CLOSE_SCORE_THRESHOLD = 0.50  # slightly easier
ECO_SCORE_THRESHOLD = 0.50         # market-driven eco strategy

# Scenario adjustments
SCENARIO_ADJ = {
    "base": 0.00,
    "bull": 0.04,
    "bear": -0.04,
}

# ★ Stronger cost cap to improve feasible deals
# This is the major lever to reach ~20% BUY.
RENO_COST_CAP_PCT_OF_FAIR = 0.20   # was ~0.25–0.28 previously


In [23]:
def assign_base_tier(age: float) -> str:
    if pd.isna(age):
        return "Medium"
    for max_age, tier in AGE_TIERS:
        if age <= max_age:
            return tier
    return "Medium"


def bump_tier(tier: str) -> str:
    order = ["Light", "Medium", "Heavy"]
    if tier not in order:
        return tier
    idx = order.index(tier)
    return order[min(idx + 1, len(order) - 1)]


def random_uniform_by_tier(tiers: pd.Series, ranges: Dict[str, Tuple[float, float]], rng: np.random.Generator):
    out = np.empty(len(tiers), dtype=float)
    for i, t in enumerate(tiers):
        lo, hi = ranges.get(t, (np.nan, np.nan))
        out[i] = rng.uniform(lo, hi) if not (np.isnan(lo) or np.isnan(hi)) else np.nan
    return out


In [24]:
def build_model3_v3(df: pd.DataFrame, seed: int = 42) -> pd.DataFrame:
    rng = np.random.default_rng(seed)
    df = df.copy()

    # 1) Base tier from age
    if "age" in df.columns:
        df["reno_tier"] = df["age"].apply(assign_base_tier)
    else:
        df["reno_tier"] = "Medium"

    # 2) Distress bump (still strict)
    if DISTRESS_BUMP_ENABLED and "closed_price" in df.columns and "fair_low_95" in df.columns:
        distress_mask = (
            df["closed_price"].notna() &
            df["fair_low_95"].notna() &
            (df["closed_price"] < 0.90 * df["fair_low_95"])
        )
        df.loc[distress_mask, "reno_tier"] = df.loc[distress_mask, "reno_tier"].apply(bump_tier)

    # 3) cost_per_sqft
    df["cost_per_sqft"] = random_uniform_by_tier(df["reno_tier"], COST_PSF_RANGES, rng)

    # 4) Base reno cost
    if "sqft" in df.columns:
        df["estimated_reno_cost"] = df["sqft"].fillna(0) * df["cost_per_sqft"]
    else:
        df["estimated_reno_cost"] = np.nan

    # 5) Bath premium
    if "baths" in df.columns:
        bath_premium = df["reno_tier"].map(BATH_PREMIUM).fillna(0)
        df["estimated_reno_cost"] = df["estimated_reno_cost"] + df["baths"].fillna(0) * bath_premium

    # 6) ★ Strong reno cost cap
    if "predicted_fair_price" in df.columns:
        cap = df["predicted_fair_price"].astype(float) * RENO_COST_CAP_PCT_OF_FAIR
        df["estimated_reno_cost"] = np.where(
            cap.notna(),
            np.minimum(df["estimated_reno_cost"].astype(float), cap),
            df["estimated_reno_cost"]
        )

    # 7) Renovation days
    days_ranges = {
        "Light": (14, 28),
        "Medium": (25, 50),
        "Heavy": (45, 90),
    }
    days = random_uniform_by_tier(df["reno_tier"], days_ranges, rng)
    df["estimated_reno_days"] = pd.Series(days).round().astype("Int64")

    # 8) Market-driven CX/eco flags
    score = df["Final_City_Score"].fillna(0) if "Final_City_Score" in df.columns else 0

    df["warranty_flag"] = DEFAULT_WARRANTY_FLAG
    df["fast_close_flag"] = (score >= FAST_CLOSE_SCORE_THRESHOLD).astype(int)
    df["eco_upgrade_flag"] = (score >= ECO_SCORE_THRESHOLD).astype(int)

    # 9) Uplift components
    df["reno_uplift_pct"] = random_uniform_by_tier(df["reno_tier"], RENO_UPLIFT_RANGES, rng)

    eco_uplift = rng.uniform(ECO_UPLIFT_RANGE[0], ECO_UPLIFT_RANGE[1], len(df))
    df["eco_uplift_pct"] = np.where(df["eco_upgrade_flag"] == 1, eco_uplift, 0.0)

    warranty_uplift = rng.uniform(WARRANTY_UPLIFT_RANGE[0], WARRANTY_UPLIFT_RANGE[1], len(df))
    fast_uplift = rng.uniform(FAST_CLOSE_UPLIFT_RANGE[0], FAST_CLOSE_UPLIFT_RANGE[1], len(df))

    df["cx_uplift_pct"] = 0.0
    df.loc[df["warranty_flag"] == 1, "cx_uplift_pct"] += warranty_uplift[df["warranty_flag"] == 1]
    df.loc[df["fast_close_flag"] == 1, "cx_uplift_pct"] += fast_uplift[df["fast_close_flag"] == 1]

    df["total_uplift_pct"] = df["reno_uplift_pct"] + df["eco_uplift_pct"] + df["cx_uplift_pct"]

    # 10) Sell prices
    if "predicted_fair_price" in df.columns:
        base = df["predicted_fair_price"].astype(float) * (1 + df["total_uplift_pct"].astype(float))
        df["predicted_sell_price_base"] = base
        df["predicted_sell_price_bull"] = base * (1 + SCENARIO_ADJ["bull"])
        df["predicted_sell_price_bear"] = base * (1 + SCENARIO_ADJ["bear"])
    else:
        df["predicted_sell_price_base"] = np.nan
        df["predicted_sell_price_bull"] = np.nan
        df["predicted_sell_price_bear"] = np.nan

    return df


In [25]:
df2 = pd.read_csv(INPUT_CSV)
print("Model 2 shape:", df2.shape)
df2.head()


Model 2 shape: (1562, 19)


Unnamed: 0,property_id,metro,city,state,zip,beds,baths,sqft,lot_size,age,closed_price,predicted_fair_price,fair_price_low_95,fair_price_high_95,max_offer_price,review_flag,CBSA,CBSA_NAME,Final_City_Score
0,1,Peoria,Peoria,IL,61615,3,1.0,1897,9170,56,176500,186893.9,140170.425,233617.375,177549.205,REVIEW_REQUIRED,37900,"Peoria, IL",0.595813
1,2,Peoria,Morton,IL,61550,4,2.5,2168,13200,50,330000,264995.3828,198746.5371,331244.2285,251745.6137,OK,37900,"Peoria, IL",0.595813
2,3,Peoria,Washington,IL,61571,3,1.0,874,11600,77,115000,113478.8966,85109.17246,141848.6208,107804.9518,REVIEW_REQUIRED,37900,"Peoria, IL",0.595813
3,4,Peoria,Pekin,IL,61554,3,2.5,1928,17859,63,159900,225758.2468,169318.6851,282197.8085,214470.3344,OK,37900,"Peoria, IL",0.595813
4,5,Peoria,Peoria Heights,IL,61616,2,1.0,720,6098,75,105000,82298.07925,61723.55944,102872.5991,78183.17529,OK,37900,"Peoria, IL",0.595813


In [26]:
df3 = build_model3_v3(df2, seed=SEED)
print("Model 3 v3 shape:", df3.shape)

df3[[
    "reno_tier", "cost_per_sqft", "estimated_reno_cost",
    "eco_upgrade_flag", "fast_close_flag", "warranty_flag",
    "reno_uplift_pct", "eco_uplift_pct", "cx_uplift_pct", "total_uplift_pct",
    "predicted_sell_price_base"
]].head(10)


Model 3 v3 shape: (1562, 33)


Unnamed: 0,reno_tier,cost_per_sqft,estimated_reno_cost,eco_upgrade_flag,fast_close_flag,warranty_flag,reno_uplift_pct,eco_uplift_pct,cx_uplift_pct,total_uplift_pct,predicted_sell_price_base
0,Heavy,49.800989,37378.78,1,1,1,0.22285,0.017138,0.02096,0.260948,235663.544904
1,Medium,24.144298,52999.07656,1,1,1,0.135441,0.021327,0.021319,0.178087,312187.549508
2,Heavy,51.747752,22695.77932,1,1,1,0.265009,0.031414,0.026541,0.322964,150128.477328
3,Heavy,48.039465,45151.64936,1,1,1,0.215465,0.028215,0.017551,0.261231,284733.395803
4,Heavy,34.166079,16459.61585,1,1,1,0.282668,0.021791,0.021618,0.326077,109133.55598
5,Light,17.804979,46806.841206,1,1,1,0.097758,0.016114,0.01948,0.133352,407480.940862
6,Heavy,49.506213,26517.39108,1,1,1,0.251215,0.028175,0.026452,0.305842,173137.668514
7,Heavy,50.079479,49468.4558,1,1,1,0.308133,0.031363,0.027154,0.36665,338030.214167
8,Medium,19.793591,38970.114817,1,1,1,0.127865,0.019999,0.026346,0.17421,345741.925395
9,Medium,24.305403,46637.233866,1,1,1,0.123437,0.026715,0.017842,0.167993,332881.933602


In [27]:
df3["reno_to_fair_ratio"] = df3["estimated_reno_cost"] / df3["predicted_fair_price"]
df3["reno_to_fair_ratio"].describe()


count    1562.000000
mean        0.174903
std         0.044111
min         0.039378
25%         0.165140
50%         0.200000
75%         0.200000
max         0.200000
Name: reno_to_fair_ratio, dtype: float64

In [28]:
df3.to_csv(OUTPUT_CSV, index=False)
print("Saved:", OUTPUT_CSV)


Saved: G:\Real Estate Market Analysis\Model_3\Model3_SellingPrices_combined2.csv
