In [1]:
%reset -f

In [2]:
import pandas as pd
import csv
import pickle
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq


In [3]:
topics = pd.read_csv(r'C:\Users\jonat\Lasso_paper\Empirical\data\Daily_Topic_Attention_Theta.csv')
topics.rename(columns={'date': 'DATE'}, inplace=True)

In [4]:
returns = pd.read_csv(r'C:\Users\jonat\Lasso_paper\Empirical\data\return84_20.csv')

In [5]:
keep = topics['DATE'].isin(returns['DATE'])
topics = topics[keep].reset_index(drop=True)

In [6]:
topics_cols = topics.columns.tolist()
topics_cols.remove('DATE')

In [7]:
for col in topics_cols:
    x = topics[col]
    phi = x.autocorr(lag=1)      # works only for pandas Series
    topics[col] = x - phi * x.shift(1)

In [8]:
topics = topics.dropna().reset_index(drop=True)
keep = returns['DATE'].isin(topics['DATE'])
returns = returns[keep].reset_index(drop=True)
dates = returns['DATE']
returns['DATE'] = pd.to_datetime(returns['DATE'])
returns = returns.set_index('DATE').sort_index()

In [9]:
from stage1 import rolling_window_lasso, create_lagged_features

In [23]:
# Your data
X = topics[topics_cols[:10]]  # shape: (n_samples, n_features)
y = returns['vwretd']    # shape: (n_samples,)


# Run rolling LASSO
results = rolling_window_lasso(
    X=X,
    y=y,
    window_size=30,
    n_lags=1,
    standardize=True,
    cv_folds= 2,
    alphas = 10
)

# Access results
lambdas = results['lambdas']
coefficients = results['coefficients']

Creating lagged features with 1 lags...
Running 8409 rolling windows of size 30...


Rolling windows: 100%|██████████| 8409/8409 [03:37<00:00, 38.63it/s]

Rolling LASSO complete!
Average lambda: 0.002578





In [11]:
from stage1 import get_coefficient_dataframe, analyze_results

In [12]:
df_coefs = get_coefficient_dataframe(results)
analysis = analyze_results(results)

### We now estimated the 1st stage under the assumption that the agents PLM is estimated by LASSO.

The next step is now to use these forecasted returns to estimate the ALM.
The ALM in the 2nd stage is specified as: 

$$
r_{t+1} = \log(\varepsilon_{t+1}) 
+ \log(1 - \kappa e^{x'_t \beta}) 
- \log(1 - \kappa e^{x'_{t+1} \beta})
$$

$$
\kappa := \delta a^{-\gamma} \phi
$$

Here, $x'_t \beta$ and $x'_{t+1} \beta$ are the $t$ and $t+1$ return foreacsts of the agent from the 1st stage.

In [9]:

with open(r"C:\Users\jonat\Lasso_paper\Empirical\scripts\lasso_11_2025\output\first_stage.pkl", "rb") as f:
    results = pickle.load(f)

In [10]:
stage2 = pd.DataFrame({
    "date": results["prediction_dates"],
    "predictions": results["predictions"]
})
stage2["date"] = pd.to_datetime(stage2["date"])
stage2 = stage2.set_index("date")
stage2.index = pd.to_datetime(stage2.index)
returns.index = pd.to_datetime(returns.index)
stage2 = returns.join(stage2, how='inner')
stage2["epsilon"] = stage2['vwretd'] - stage2['predictions']

In [11]:
from stage2 import estimate_kappa

In [None]:
import numpy as np
from scipy.optimize import minimize

def estimate_kappa(stage2):
    """
    Estimate kappa from the ALM:
    r_{t+1} = log(eps_{t+1}) + log(1 - kappa * exp(pred_t)) - log(1 - kappa * exp(pred_{t+1}))
    """

    # align data for t and t+1
    r = stage2['vwretd'].values[1:]                 # r_{t+1}
    eps = stage2['epsilon'].values[1:]              # ε_{t+1}
    pred_t = stage2['predictions'].values[:-1]      # x'_t β
    pred_t1 = stage2['predictions'].values[1:]      # x'_{t+1} β

    def objective(kappa):
        kappa = float(kappa)
        if np.any(1 - kappa * np.exp(pred_t) <= 0) or np.any(1 - kappa * np.exp(pred_t1) <= 0):
            return 1e10
        r_hat = np.log(np.abs(eps) + 1e-8) \
              + np.log(1 - kappa * np.exp(pred_t)) \
              - np.log(1 - kappa * np.exp(pred_t1))
        val = np.sum((r - r_hat)**2)
        if not np.isfinite(val):
            val = 1e10
        # optional: print progress occasionally
        # print(f"kappa={kappa:.6f}, obj={val:.4f}")
        return val

    # choose a safe upper bound for kappa so logs stay positive
    kappa_max = 1 / np.exp(np.max([pred_t.max(), pred_t1.max()])) - 1e-8
    kappa_max = max(kappa_max, 1e-6)

    # minimize the objective
    res = minimize(objective, x0=kappa_max / 2, bounds=[(1e-6, kappa_max)], method='L-BFGS-B')
    
    return res.x[0]



In [None]:
kappa_hat = estimate_kappa(stage2)
print("Estimated κ =", kappa_hat)

Estimated κ = 0.707716086735524


  kappa = float(kappa)
