In [None]:
'''
1. Data Ingestion Module: Raw market and security data.

2. Stochastic Interest Rate Model (The "Economy"): Generates future rate paths.

3. Prepayment Model (The "Behavior"): Predicts borrower speeds (CPR) on those paths.

4. Cash Flow Engine: Applies CPR to loan balances to generate cash flows.

5. Valuation & Reporting: Calculates Price, OAS, and Risk Metrics (Greeks).
'''


In [None]:
"""
MBS Pricing Engine (Tranche Support)
===============================================
Full-stack quantitative finance engine for valuing Sequential Pay CMOs (Tranches).

Components:
1. Market Data: SABR Volatility Calibration
2. Security Master: Pool Construction from Loan Level Data
3. Economy: Hull-White 1-Factor Stochastic Rate Simulation (Corrected Volatility)
4. Behavior: Dynamic S-Curve Prepayment Model
5. Structuring: Sequential Pay Waterfall (Tranche A, B, C)
6. Valuation: Tranche-level Pricing, OAS, and Greeks

"""

import numpy as np
import pandas as pd
from scipy.optimize import minimize, brentq
import time

# =============================================================================
# SECTION 1: MARKET DATA ANALYTICS (SABR MODEL)
# =============================================================================
class SABRCalibrator:
    """Hagan's 2002 SABR Stochastic Volatility Model."""
    def __init__(self, beta=0.5):
        self.beta = beta

    def _sabr_vol(self, k, f, t, alpha, rho, nu):
        beta = self.beta
        if abs(k - f) < 1e-5:
            term1 = (1 - beta)**2 / 24 * alpha**2 / (f**(2 - 2*beta))
            term2 = (rho * beta * nu * alpha) / (4 * f**(1 - beta))
            term3 = (2 - 3 * rho**2) / 24 * nu**2
            vol = (alpha / f**(1 - beta)) * (1 + (term1 + term2 + term3) * t)
            return vol

        log_fk = np.log(f / k)
        fk_beta = (f * k)**((1 - beta) / 2)
        z = (nu / alpha) * fk_beta * log_fk
        
        def x_z(val):
            return np.log((np.sqrt(1 - 2*rho*val + val**2) + val - rho) / (1 - rho))
        
        xz = 1.0 if abs(z) < 1e-7 else x_z(z)
            
        numerator = alpha * (1 + ( ((1-beta)**2)/24 * alpha**2 / (f*k)**(1-beta) + 
                                   (rho*beta*nu*alpha)/(4*fk_beta) + 
                                   (nu**2 * (2-3*rho**2))/24 ) * t)
        denominator = fk_beta * (1 + ((1-beta)**2)/24 * log_fk**2 + ((1-beta)**4)/1920 * log_fk**4)
        return (numerator / denominator) * (z / xz)

    def calibrate(self, strikes, market_vols, F, T):
        def objective(params):
            alpha, rho, nu = params
            model_vols = [self._sabr_vol(k, F, T, alpha, rho, nu) for k in strikes]
            return np.sum((np.array(model_vols) - np.array(market_vols))**2)

        atm_vol = market_vols[len(market_vols)//2]
        initial_guess = [atm_vol * (F**(1-self.beta)), -0.2, 0.5]
        bounds = [(1e-5, None), (-0.999, 0.999), (1e-5, None)]
        result = minimize(objective, initial_guess, bounds=bounds, method='L-BFGS-B')
        return {'alpha': result.x[0], 'rho': result.x[1], 'nu': result.x[2], 'success': result.success}

    def get_vol(self, k, F, T, params):
        return self._sabr_vol(k, F, T, params['alpha'], params['rho'], params['nu'])


# =============================================================================
# SECTION 2: DATA INGESTION (POOL CONSTRUCTION)
# =============================================================================
class FreddiePoolConstructor:
    def __init__(self, filepath, valuation_date='2021-01-01'):
        self.val_date = pd.to_datetime(valuation_date)
        try:
            self.df = pd.read_csv(filepath, sep='|')
            print(f"Data Loaded: {len(self.df)} loans.")
            self._preprocess_data()
        except FileNotFoundError:
            print("Warning: File not found. Creating Mock Data.")
            self.df = self._create_mock_data()
            self._preprocess_data()

    def _create_mock_data(self):
        N = 1000
        return pd.DataFrame({
            'FIRST_PAYMENT_DATE': [201801]*N,
            'ORIGINAL_UPB': np.random.uniform(200000, 400000, N),
            'ORIGINAL_INTEREST_RATE': np.random.normal(4.5, 0.125, N),
            'ORIGINAL_LOAN_TERM': [360]*N,
            'AMORTIZATION_TYPE': ['FRM']*N
        })

    def _preprocess_data(self):
        self.df['first_pay_dt'] = pd.to_datetime(self.df['FIRST_PAYMENT_DATE'].astype(str), format='%Y%m')
        self.df['Loan_Age'] = ((self.val_date.year - self.df['first_pay_dt'].dt.year) * 12 + 
                               (self.val_date.month - self.df['first_pay_dt'].dt.month))
        self.df = self.df[self.df['Loan_Age'] >= 0].copy()
        self.df['Rem_Term'] = self.df['ORIGINAL_LOAN_TERM'] - self.df['Loan_Age']
        self.df = self.df[self.df['Rem_Term'] > 0].copy()

        r = (self.df['ORIGINAL_INTEREST_RATE'] / 100) / 12
        n = self.df['ORIGINAL_LOAN_TERM']
        a = self.df['Loan_Age']
        numerator = np.power(1+r, n) - np.power(1+r, a)
        denominator = np.power(1+r, n) - 1
        factor = np.divide(numerator, denominator, out=np.zeros_like(numerator), where=denominator!=0)
        self.df['Current_UPB'] = self.df['ORIGINAL_UPB'] * factor

    def filter_cohort(self, target_coupon, tolerance=0.25):
        mask = (self.df['ORIGINAL_INTEREST_RATE'] >= target_coupon - tolerance) & \
               (self.df['ORIGINAL_INTEREST_RATE'] <= target_coupon + tolerance)
        self.pool_loans = self.df[mask].copy()
        return self.pool_loans

    def get_pricing_stats(self):
        if self.pool_loans.empty: raise ValueError("Pool Empty")
        df = self.pool_loans
        total_bal = df['Current_UPB'].sum()
        wa_wac = (df['ORIGINAL_INTEREST_RATE'] * df['Current_UPB']).sum() / total_bal
        wa_wam = (df['Rem_Term'] * df['Current_UPB']).sum() / total_bal
        wa_wala = (df['Loan_Age'] * df['Current_UPB']).sum() / total_bal
        return {'Balance': total_bal, 'WAC': wa_wac / 100.0, 'WAM': int(wa_wam), 
                'WALA': int(wa_wala), 'ID': f"MBS_{wa_wac:.2f}"}


# =============================================================================
# SECTION 3: ECONOMY (HULL-WHITE)
# =============================================================================
class HullWhiteSimulator:
    def __init__(self, a=0.03, sigma=0.012, r0=0.01, T=30, dt=1/12):
        self.a = a; self.sigma = sigma; self.r0 = r0; self.T = T; self.dt = dt
        self.n_steps = int(T / dt)

    def simulate_paths(self, n_paths):
        np.random.seed(42)
        rates = np.zeros((n_paths, self.n_steps + 1))
        rates[:, 0] = self.r0
        Z = np.random.normal(0, 1, size=(n_paths, self.n_steps))
        sqrt_dt = np.sqrt(self.dt)
        for t in range(self.n_steps):
            drift = (self.a * self.r0 - self.a * rates[:, t]) * self.dt
            rates[:, t+1] = rates[:, t] + drift + self.sigma * sqrt_dt * Z[:, t]
        integral_r = np.cumsum(rates[:, :-1], axis=1) * self.dt
        return rates[:, :-1], np.exp(-integral_r)


# =============================================================================
# SECTION 4: BEHAVIOR (PREPAYMENT)
# =============================================================================
class PrepaymentModel:
    def __init__(self, k=10.0, center=0.015, min_cpr=0.03, max_cpr=0.60):
        self.k = k; self.center = center; self.min_cpr = min_cpr; self.max_cpr = max_cpr

    def calculate_cpr_vector(self, wac, mortgage_rates, loan_age_start):
        n = len(mortgage_rates)
        incentive = wac - mortgage_rates
        exponent = np.clip(-self.k * (incentive - self.center), -50, 50)
        refi_factor = 1.0 / (1.0 + np.exp(exponent))
        base_cpr = self.min_cpr + (self.max_cpr - self.min_cpr) * refi_factor
        ages = np.arange(loan_age_start + 1, loan_age_start + n + 1)
        ramp = np.minimum(ages / 30.0, 1.0)
        return base_cpr * ramp


# =============================================================================
# SECTION 5: STRUCTURED CASH FLOW ENGINE (WATERFALL)
# =============================================================================
class SequentialPayEngine:
    """
    Handles Cash Flow Waterfall for 3 Tranches (A, B, C).
    - Pro-Rata Interest
    - Sequential Principal (A -> B -> C)
    """
    def __init__(self, pool_stats, structure_pcts={'A': 0.60, 'B': 0.30, 'C': 0.10}):
        self.pool_bal = pool_stats['Balance']
        self.wac = pool_stats['WAC']
        self.wam = pool_stats['WAM']
        
        # Initialize Tranche Balances
        self.tranches = {
            'A': {'Balance': self.pool_bal * structure_pcts['A'], 'Name': 'Senior'},
            'B': {'Balance': self.pool_bal * structure_pcts['B'], 'Name': 'Mezzanine'},
            'C': {'Balance': self.pool_bal * structure_pcts['C'], 'Name': 'Subordinate'}
        }
        
    def get_waterfall_pv(self, cpr_vector, discount_factors, oas=0.0):
        """Calculates PV for each tranche for a single path."""
        months = min(len(cpr_vector), self.wam)
        
        # Reset Balances for this simulation path
        bal_A = self.tranches['A']['Balance']
        bal_B = self.tranches['B']['Balance']
        bal_C = self.tranches['C']['Balance']
        
        curr_pool_bal = self.pool_bal
        gwac_monthly = self.wac / 12.0
        
        pv_A, pv_B, pv_C = 0.0, 0.0, 0.0
        
        for t in range(months):
            if curr_pool_bal < 0.01: break
            
            # --- 1. Pool Level Calculation ---
            rem_term = self.wam - t
            if gwac_monthly == 0: pmt = curr_pool_bal / rem_term
            else:
                factor = (1 + gwac_monthly)**rem_term
                pmt = curr_pool_bal * (gwac_monthly * factor) / (factor - 1)
            
            int_pool = curr_pool_bal * gwac_monthly
            prin_sched = max(0, pmt - int_pool)
            
            smm = 1 - (1 - cpr_vector[t])**(1/12)
            bal_after_sched = curr_pool_bal - prin_sched
            prin_prepay = bal_after_sched * smm
            
            total_prin_available = prin_sched + prin_prepay
            
            # --- 2. Waterfall Allocation ---
            
            # A. Interest (Pro-Rata / Based on Outstanding Balance)
            # Assuming all tranches pay the same Net WAC for simplicity
            cf_int_A = bal_A * gwac_monthly
            cf_int_B = bal_B * gwac_monthly
            cf_int_C = bal_C * gwac_monthly
            
            # B. Principal (Sequential: A -> B -> C)
            pay_A = min(bal_A, total_prin_available)
            remaining_prin = total_prin_available - pay_A
            
            pay_B = min(bal_B, remaining_prin)
            remaining_prin = remaining_prin - pay_B
            
            pay_C = min(bal_C, remaining_prin) # C takes the rest
            
            # --- 3. Discounting ---
            oas_factor = np.exp(-oas * (t + 1)/12.0)
            df = discount_factors[t] * oas_factor
            
            pv_A += (cf_int_A + pay_A) * df
            pv_B += (cf_int_B + pay_B) * df
            pv_C += (cf_int_C + pay_C) * df
            
            # --- 4. Update State ---
            bal_A -= pay_A
            bal_B -= pay_B
            bal_C -= pay_C
            curr_pool_bal = bal_A + bal_B + bal_C
            
        return {'A': pv_A, 'B': pv_B, 'C': pv_C}


# =============================================================================
# SECTION 6: ANALYTICS & VALUATION (MULTI-TRANCHE)
# =============================================================================
class StructuredAnalyticsEngine:
    def __init__(self, pool_stats, rates, discount_factors, structure_pcts):
        self.pool_stats = pool_stats
        self.rates = rates
        self.dfs = discount_factors
        self.cf_engine = SequentialPayEngine(pool_stats, structure_pcts)
        self.prepay_model = PrepaymentModel()
        self.structure_pcts = structure_pcts
        
    def calculate_prices(self, oas=0.0, rate_shift=0.0):
        n_paths = self.rates.shape[0]
        results = {'A': [], 'B': [], 'C': []}
        mortgage_spread = 0.025
        
        for i in range(n_paths):
            shifted_rates = self.rates[i] + rate_shift
            cpr_vec = self.prepay_model.calculate_cpr_vector(
                self.pool_stats['WAC'], shifted_rates + mortgage_spread, self.pool_stats['WALA']
            )
            
            path_dfs = self.dfs[i]
            if rate_shift != 0:
                time_arr = np.arange(1, len(path_dfs)+1) / 12.0
                path_dfs = path_dfs * np.exp(-rate_shift * time_arr)
            
            pvs = self.cf_engine.get_waterfall_pv(cpr_vec, path_dfs, oas)
            for tr in ['A', 'B', 'C']:
                results[tr].append(pvs[tr])
                
        # Aggregate
        output = {}
        total_bal = self.pool_stats['Balance']
        for tr in ['A', 'B', 'C']:
            avg_pv = np.mean(results[tr])
            tranche_bal = total_bal * self.structure_pcts[tr]
            price = (avg_pv / tranche_bal) * 100 if tranche_bal > 0 else 0
            output[tr] = {'Price': price, 'PV': avg_pv}
            
        return output

    def calculate_greeks(self, oas_base):
        bp = 0.0025
        base = self.calculate_prices(oas=oas_base, rate_shift=0)
        up = self.calculate_prices(oas=oas_base, rate_shift=bp)
        down = self.calculate_prices(oas=oas_base, rate_shift=-bp)
        
        greeks = {}
        for tr in ['A', 'B', 'C']:
            p0 = base[tr]['Price']
            p_up = up[tr]['Price']
            p_down = down[tr]['Price']
            
            if p0 > 1: # Avoid division by zero
                dur = (p_down - p_up) / (2 * p0 * bp)
                conv = (p_down + p_up - 2*p0) / (2 * p0 * bp**2)
            else:
                dur, conv = 0, 0
                
            greeks[tr] = {
                'Price': p0,
                'Eff_Duration': dur,
                'Eff_Convexity': conv
            }
        return greeks


# =============================================================================
# MAIN EXECUTION
# =============================================================================
if __name__ == "__main__":
    print("--- STARTING STRUCTURED MBS ENGINE (3-TRANCHE) ---")
    start_time = time.time()

    # 1. SETUP
    DATA_DIR = "C:/Users/hansh/OneDrive/Desktop/OBITS Lab/MBS Simulate/data/Freddie data/"
    csv_path = DATA_DIR + 'combined_sampled_mortgages_2017_2020.csv'
    print("\n[1] Constructing Collateral Pool...")
    pool_ctor = FreddiePoolConstructor(csv_path)
    pool_loans = pool_ctor.filter_cohort(target_coupon=4.5)
    pool_stats = pool_ctor.get_pricing_stats()
    print(f"    Collateral: ${pool_stats['Balance']:,.0f} | WAC: {pool_stats['WAC']*100:.2f}% | WAM: {pool_stats['WAM']}m")

    # 2. CALIBRATION (SABR)
    print("\n[2] Calibrating Volatility...")
    F_curr = 0.04
    sabr = SABRCalibrator()
    sabr_params = sabr.calibrate([0.02, 0.03, 0.04, 0.05, 0.06], [0.35, 0.28, 0.25, 0.24, 0.26], F_curr, 5.0)
    
    vol_lognormal = sabr.get_vol(0.02, F_curr, 5.0, sabr_params)
    sigma_normal = vol_lognormal * F_curr # Conversion to Normal Vol
    print(f"    SABR Implied Vol (Normal): {sigma_normal*10000:.1f} bps")

    # 3. ECONOMY
    print("\n[3] Simulating Rates (Hull-White)...")
    hw = HullWhiteSimulator(a=0.05, sigma=sigma_normal, r0=0.01, T=30)
    rates, dfs = hw.simulate_paths(n_paths=100) # 100 paths for demo

    # 4. VALUATION (STRUCTURED)
    print("\n[4] Structuring & Pricing Tranches...")
    # Define Structure: 50% Senior (A), 30% Mezz (B), 20% Sub (C)
    structure = {'A': 0.50, 'B': 0.30, 'C': 0.20}
    
    engine = StructuredAnalyticsEngine(pool_stats, rates, dfs, structure)
    greeks = engine.calculate_greeks(oas_base=0.0050)
    
    print("\n" + "="*50)
    print(f" FINAL PRICING REPORT (OAS +50bps)")
    print("="*50)
    print(f"{'Tranche':<10} | {'Type':<10} | {'Price':<10} | {'Duration':<10} | {'Convexity':<10}")
    print("-" * 65)
    
    types = {'A': 'Senior', 'B': 'Mezz', 'C': 'Sub/Equity'}
    for tr in ['A', 'B', 'C']:
        d = greeks[tr]
        print(f" Class {tr:<5} | {types[tr]:<10} | {d['Price']:<10.3f} | {d['Eff_Duration']:<10.3f} | {d['Eff_Convexity']:<10.3f}")
    
    print("-" * 65)
    print(f"Total Runtime: {time.time() - start_time:.2f}s")

--- STARTING STRUCTURED MBS ENGINE (3-TRANCHE) ---

[1] Constructing Collateral Pool...


  self.df = pd.read_csv(filepath, sep='|')


Data Loaded: 80000 loans.
    Collateral: $5,588,631,083 | WAC: 4.49% | WAM: 319m

[2] Calibrating Volatility...
    SABR Implied Vol (Normal): 140.4 bps

[3] Simulating Rates (Hull-White)...

[4] Structuring & Pricing Tranches...

 FINAL PRICING REPORT (OAS +50bps)
Tranche    | Type       | Price      | Duration   | Convexity 
-----------------------------------------------------------------
 Class A     | Senior     | 102.398    | 0.713      | -3.251    
 Class B     | Mezz       | 107.657    | 2.355      | -8.866    
 Class C     | Sub/Equity | 117.643    | 5.160      | -6.102    
-----------------------------------------------------------------
Total Runtime: 1.54s
