# Reading in data

In [9]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Define constants
DATA_DIR = r'C:\Mini_Project\output\final_data\data'
STRIKES = [0.8, 0.85, 0.9, 0.95, 1.0, 1.05, 1.1, 1.15, 1.2]
MATURITIES = [30, 60, 90, 120, 150, 180, 360]
START_DATE = datetime(2015, 11, 2)
END_DATE = datetime(2025, 10, 31)

# Generate the list of dates in the range (closed interval)
all_dates = pd.date_range(START_DATE, END_DATE, freq='B')  # Use business days if needed, else use 'D'


# Helper function to load options data, concatenate, and filter relevant columns
def load_all_options_data(data_dir, start_date, end_date):
    """
    Load all yearly option files and concatenate into one DataFrame.
    """
    all_dfs = []
    for year in range(start_date.year, end_date.year + 1):
        yearly_folder = os.path.join(data_dir, str(year))
        csv_path = os.path.join(yearly_folder, f"^NDX_options_data_{year}.csv")
        if os.path.exists(csv_path):
            df = pd.read_csv(csv_path, parse_dates=['date'])
            all_dfs.append(df)
    if not all_dfs:
        raise FileNotFoundError("No options data files found in the requested range.")
    all_data = pd.concat(all_dfs, ignore_index=True)
    return all_data

# Load all data (it automatically parses dates)
options_data = load_all_options_data(DATA_DIR, START_DATE, END_DATE)

# Ensure the date column is datetime, and filter for requested date range
options_data['date'] = pd.to_datetime(options_data['date'])
options_data = options_data[(options_data['date'] >= START_DATE) & (options_data['date'] <= END_DATE)]

In [10]:
options_data.head()

Unnamed: 0,date,current_price,strike,relative_strike,maturity_days,maturity_years,option_price,implied_vol,volume,option_type
0,2015-11-02,4703.919922,3775.0,0.8,30,0.082192,2.2,0.313281,11.0,put
1,2015-11-02,4703.919922,3775.0,0.8,60,0.164384,4.5,0.311558,10.0,put
2,2015-11-02,4703.919922,3775.0,0.8,90,0.246575,9.4,0.310001,2.0,put
3,2015-11-02,4703.919922,3775.0,0.8,120,0.328767,28.5,0.3087,2.0,put
4,2015-11-02,4703.919922,3775.0,0.8,150,0.410959,28.5,0.307784,2.0,put


In [11]:
# computing the time series of log-returns for each (strike, maturity) combination
log_returns_dict = {}
for k in STRIKES:
    for t in MATURITIES:
        # Select rows for this strike/maturity
        mask = (options_data['relative_strike'] == k) & (options_data['maturity_days'] == t)
        df_slice = options_data[mask].copy()
        # Sort by date to ensure chronological order
        df_slice = df_slice.sort_values('date')
        # Pick the column (e.g. implied_vol or option_price) for returns
        # We'll use implied_vol as an example, but you may change to 'option_price' if needed
        price_series = df_slice.set_index('date')['implied_vol']
        price_series = price_series.loc[~price_series.index.duplicated(keep='first')]
        # Compute log returns
        log_ret = np.log(price_series).diff().dropna()
        # Store as a pandas Series in the dictionary
        log_returns_dict[(k, t)] = log_ret

In [None]:
import numpy as np
import math
from scipy.optimize import minimize
from scipy.stats import norm

# Merton's jump-diffusion log-likelihood for increments
def merton_loglike(params, returns):
    """
    params: [mu, sigma, lam, mu_j, sigma_j]
    returns: array of log returns
    """
    mu, sigma, lam, mu_j, sigma_j = params

    # Physical constraints to keep fit reasonable
    if sigma <= 0 or sigma_j <= 0 or lam < 0:
        return np.inf  # negative likelihood

    # For numerical stability, restrict lambda to small positive if close to zero
    lam = max(lam, 1e-12)

    # Discretization over possible number of jumps (0, 1, 2, ...)
    max_k = 5  # Most probability mass will be in 0-3 jumps per dt

    # dt = 1 for daily log returns; generalization: dt
    dt = 1.0
    prob = 0.0
    ll = 0.0
    for x in returns:
        px = 0.0
        for k in range(max_k+1):
            # Poisson probability of k jumps
            p_k = np.exp(-lam*dt) * (lam*dt)**k / math.factorial(k)
            mean = mu*dt + k*mu_j
            var = sigma**2*dt + k*sigma_j**2
            px += p_k * norm.pdf(x, loc=mean, scale=np.sqrt(var))
        # Protect against log(0)
        px = max(px, 1e-18)
        ll += np.log(px)
    return -ll  # negative log-likelihood for minimization

# Dictionary for estimated parameters
merton_params_dict = {}

import json

log_filename = "jump_process_params.log"

for key, returns in log_returns_dict.items():
    returns = returns.values
    # Initial params: [mu, sigma, lam, mu_j, sigma_j]
    mu0 = np.mean(returns)
    sigma0 = np.std(returns)
    lam0 = 0.1  # initial guess: 0.1 jumps per day
    mu_j0 = 0.0
    sigma_j0 = 0.2*sigma0 if sigma0 > 0 else 0.01
    bounds = [
        (None, None),         # mu
        (1e-6, None),         # sigma > 0
        (0, 2.0),             # lambda >= 0
        (None, None),         # mu_j
        (1e-6, None),         # sigma_j > 0
    ]
    x0 = [mu0, sigma0, lam0, mu_j0, sigma_j0]
    result = minimize(
        merton_loglike, x0, args=(returns,), method='L-BFGS-B', bounds=bounds,
        options={'disp': False, 'maxiter': 500}
    )
    opt_params = result.x
    merton_params_dict[key] = {
        'mu': opt_params[0],
        'sigma': opt_params[1],
        'lambda': opt_params[2],
        'mu_jump': opt_params[3],
        'sigma_jump': opt_params[4],
        'success': result.success,
        'negloglike': result.fun
    }
    print(f"Strike={key[0]:.2f}, Maturity={key[1]:d}d | "
          f"mu={opt_params[0]:.5f}, sigma={opt_params[1]:.5f}, "
          f"lambda={opt_params[2]:.5f}, mu_j={opt_params[3]:.5f}, sigma_j={opt_params[4]:.5f}, "
          f"success={result.success}")

    # Write params to file after each loop
    # Convert tuple keys to strings for JSON serialization
    json_dict = {f"{k[0]}_{k[1]}": v for k, v in merton_params_dict.items()}
    with open(log_filename, "w") as f:
        json.dump(json_dict, f, indent=4)


  result = minimize(


Strike=0.80, Maturity=30d | mu=0.00837, sigma=0.04965, lambda=0.40077, mu_j=-0.02103, sigma_j=0.19853, success=True


TypeError: keys must be str, int, float, bool or None, not tuple