In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load Zillow house price data and Federal Reserve economic data
fed_data = ["CPIAUCSL.csv", "RRVRUSQ156N.csv", "MORTGAGE30US.csv"]
zillow_data = "Metro_zhvi_uc_sfrcondo_tier_0.33_0.67_month.csv"

# Merge datasets on 'Date'
dfs = [pd.read_csv(f, parse_dates=True, index_index='DATE') for f in fed_data]
zillow = pd.read_csv(zillow_data, parse_dates=True, index_col='DATE')
price_data = pd.concat(dfs + [zillow], axis=1).ffill().dropna()

## 2. Settlyfe Spatio-Temporal Feature Engineering

In [None]:
def create_settlyfe_features(df):
    # 1. Temporal: Seasonal (Sine/Cosine) 
    df["month"] = df.index.month
    df["month_sin"] = np.sin(2 * np.pi * df["month"] / 12)
    df["month_cos"] = np.cos(2 * np.pi * df["month"] / 12)
    
    # 2. Market Trend: Percentage change in last 3/6/12 months
    df["price_change_3m"] = df["price"].pct_change(3)
    df["price_change_12m"] = df["price"].pct_change(12)
    
    # 3. Target: Predict the price 3 months in the future
    df["target"] = df["price"].shift(-3)
    return df.dropna()

price_data = create_settlyfe_features(price_data)

## 3. Backtesting engine

In [None]:
def backtest(data, model, predictors, start=24, step=6):
    all_predictions = []
    
    # Start with 2 years of data, predict next 6 months, then slide forward
    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i+step)].copy()
        
        model.fit(train[predictors], train["target"])
        preds = model.predict(test[predictors])
        
        combined = pd.concat([test["target"], pd.Series(preds, index=test.index)], axis=1)
        combined.columns = ["actual", "prediction"]
        all_predictions.append(combined)
        
    return pd.concat(all_predictions)

## 4. Training and Evaluation

We evaluate using Median Absolute Percentage Error (MdAPE) to shadow Zillowâ€™s accuracy metrics.

In [None]:
predictors = ["CPIAUCSL", "MORTGAGE30US", "month_sin", "month_cos", "price_change_12m"]
model = RandomForestRegressor(n_estimators=100, min_samples_split=10, random_state=1)

predictions = backtest(price_data, model, predictors)

# Calculate Accuracy (MdAPE)
mdape = np.median(np.abs((predictions["actual"] - predictions["prediction"]) / predictions["actual"])) * 100
print(f"Settlyfe LyfeEstimate MdAPE: {mdape:.2f}%")

## 5. Diagnostics & Visualization

In [None]:
predictions.plot(figsize=(10,5), title="Actual vs Predicted House Prices (LyfeEstimate)")
plt.ylabel("Price Index")
plt.show()