In [None]:
%reset -f

In [None]:
import pandas as pd
import csv
import pickle
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
from pathlib import Path
import os

from stage1 import lasso_rolling_window, create_lagged_features, get_coefficient_dataframe, analyze_results
from stage2 import estimate_kappa

In [None]:
base_dir = Path(os.getenv("LASSO_OUTPUT_DIR", "output"))

In [None]:
features_path = Path("output") / "features.pkl"
response_path = Path("output") / "response.pkl"
return_path = Path("data") / "return84_20.csv"

with features_path.open("rb") as f:
    X = pickle.load(f)
    
with response_path.open("rb") as f:
    y = pickle.load(f)

if not return_path.exists():
    return_path = Path(r"C:\Users\jonat\Lasso_paper\Empirical\data\return84_20.csv")
    print("Using absolute path:", return_path)


In [None]:
# Cross-validated lambda
res_cv = lasso_rolling_window(X, y, window_size=60, n_lags=3, lambda_mode="cv", cv_folds=5)

In [None]:
# Fixed lambda
res_fixed = lasso_rolling_window(X, y, window_size=60, n_lags=3,
                                 lambda_mode="fixed", fixed_lambda=0.0005)


In [None]:

# Access results
lambdas = res_fixed['lambdas']
coefficients = res_fixed['coefficients']

In [None]:
df_coefs = get_coefficient_dataframe(results)
analysis = analyze_results(results)

### We now estimated the 1st stage under the assumption that the agents PLM is estimated by LASSO.

The next step is now to use these forecasted returns to estimate the ALM.
The ALM in the 2nd stage is specified as: 

$$
r_{t+1} = \log(\varepsilon_{t+1}) 
+ \log(1 - \kappa e^{x'_t \beta}) 
- \log(1 - \kappa e^{x'_{t+1} \beta})
$$

$$
\kappa := \delta a^{-\gamma} \phi
$$

Here, $x'_t \beta$ and $x'_{t+1} \beta$ are the $t$ and $t+1$ return foreacsts of the agent from the 1st stage.

In [None]:
# Create a DataFrame of predictions indexed by date
stage2 = (
    pd.DataFrame({
        "predictions": results["predictions"]
    }, index=pd.to_datetime(results["prediction_dates"]))
)

# Ensure returns index is datetime and join
returns.index = pd.to_datetime(returns["DATE"])
stage2 = returns.join(stage2, how='inner')

# # Compute residuals (epsilon)
stage2["epsilon"] = stage2["vwretd"] - stage2["predictions"]


In [None]:
import numpy as np
from scipy.optimize import minimize

def estimate_kappa(stage2):
    """
    Estimate kappa from the ALM:
    r_{t+1} = log(eps_{t+1}) + log(1 - kappa * exp(pred_t)) - log(1 - kappa * exp(pred_{t+1}))
    """

    # align data for t and t+1
    r = stage2['vwretd'].values[1:]                 # r_{t+1}
    eps = stage2['epsilon'].values[1:]              # ε_{t+1}
    pred_t = stage2['predictions'].values[:-1]      # x'_t β
    pred_t1 = stage2['predictions'].values[1:]      # x'_{t+1} β

    def objective(kappa):
        kappa = float(kappa)
        if np.any(1 - kappa * np.exp(pred_t) <= 0) or np.any(1 - kappa * np.exp(pred_t1) <= 0):
            return 1e10
        r_hat = np.log(np.abs(eps) + 1e-8) \
              + np.log(1 - kappa * np.exp(pred_t)) \
              - np.log(1 - kappa * np.exp(pred_t1))
        val = np.sum((r - r_hat)**2)
        if not np.isfinite(val):
            val = 1e10
        # optional: print progress occasionally
        # print(f"kappa={kappa:.6f}, obj={val:.4f}")
        return val

    # choose a safe upper bound for kappa so logs stay positive
    kappa_max = 1 / np.exp(np.max([pred_t.max(), pred_t1.max()])) - 1e-8
    kappa_max = max(kappa_max, 1e-6)

    # minimize the objective
    res = minimize(objective, x0=kappa_max / 2, bounds=[(1e-6, kappa_max)], method='L-BFGS-B')
    
    return res.x[0]

In [None]:
kappa_hat = estimate_kappa(stage2)
print("Estimated κ =", kappa_hat)