In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from scipy.integrate import quad       #package for calculating numerical integrations.
from scipy.optimize import minimize    #package for minimization.
import datetime                   
from dateutil.tz import tzutc          #importing the UTC timezone class from the python-dateutil package.

from scipy.interpolate import griddata #a useful package that interpolates scattered data onto a structured grid. 
from scipy.optimize import brentq      #a useful package that solves roots for given functions.

# In the below we set up functions we use to calibrate the model which captures the stock movement of Netflix. 
# These paramters could be calibrated via a least square optimization process. More precisely, we minimize the loss function which is 
# given by the mean square of the differences between the market price for stocks and the modeled stock price 
# (which is given by a closed formula depending on the parameters), guranteeing that the modeled stock price is "close" to the real stock prices.



# Non-vectorized calculation of European Heston call price (to be called in 'Heston_calibration' as the regression function.
def heston_call(S0, K, v0, r, t, kappa, theta, xi, rho, phi_max=200):
    """
    Heston call price using Carr-Madan style integrals (characteristic function).
    phi_max: upper limit of integral (increase for more accuracy).
    """

    def integrand(phi, Pnum): 
        u = 0.5 if Pnum == 1 else -0.5
        b = kappa - rho * xi if Pnum == 1 else kappa
        a = kappa * theta
        # complex sqrt
        d = np.sqrt((rho * xi * phi * i - b) ** 2 - (xi ** 2) * (2 * u * phi * i - phi ** 2))
        g = (b - rho * xi * phi * i + d) / (b - rho * xi * phi * i - d)

        exp1 = np.exp(i * phi * np.log(S0 / K))
        # C and D as in Heston closed form
        C = r * phi * i * t + (a / (xi ** 2)) * ((b - rho * xi * phi * i + d) * t - 2 * np.log((1 - g * np.exp(d * t)) / (1 - g)))
        D = ((b - rho * xi * phi * i + d) / (xi ** 2)) * ((1 - np.exp(d * t)) / (1 - g * np.exp(d * t)))
        f = exp1 * np.exp(C + D * v0)
        # real part of f / (i * phi)
        return np.real(f / (i * phi))

    # numeric integrals from epsilon to phi_max
    eps = 1e-8
    P1 = 0.5 + (1 / np.pi) * quad(lambda ph: integrand(ph, 1), eps, phi_max, limit=200)[0]
    P2 = 0.5 + (1 / np.pi) * quad(lambda ph: integrand(ph, 2), eps, phi_max, limit=200)[0]
    call_price = S0 * P1 - K * np.exp(-r * t) * P2             # This is the semi-closed form of European Heston call price.
    return call_price




# Time-to-Expiration Finder (returning in years), based on the data from yfinance database.
def find_tte_yf_options(expiration_date, last_trade_date):      
    """
    Return time to expiration in years between an expiration date string 'YYYY-MM-DD'
    and yfinance lastTradeDate (pd.Timestamp, possibly tz-aware).
    Assuming expiration time at 21:30 UTC (close-of-day-ish).
    """
    # construct tz-aware expiration datetime at 21:30 UTC
    exp_dt = datetime.datetime.strptime(expiration_date, "%Y-%m-%d").replace(hour=21, minute=30, tzinfo=tzutc())
    # ensure last_trade_date is timezone-aware (if naive assume UTC)
    if last_trade_date.tzinfo is None:
        lt = last_trade_date.replace(tzinfo=tzutc())
    else:
        lt = last_trade_date
    seconds = (exp_dt - lt).total_seconds()
    years = seconds / (60 * 60 * 24 * 365)
    return years




# Spot Price Finder (returning in years), based on the data from yfinance database.
def yf_find_approx_spot(stock_data, last_trade_date):
    """
    Approximate spot price at last_trade_date using minute-level yfinance data.
    stock_data: Series or DataFrame with DatetimeIndex (minute resolution). Prefer column 'Close' if DataFrame.
    last_trade_date: pd.Timestamp
    """
    # round to minute
    ts = last_trade_date.replace(second=0, microsecond=0)
    # try exact minute
    try:
        if isinstance(stock_data, pd.DataFrame):
            if 'Close' in stock_data.columns:
                return float(stock_data.loc[ts, 'Close'])
            # otherwise take first column
            return float(stock_data.loc[ts].iloc[0])
        else:
            # Series
            return float(stock_data.loc[ts])
    except Exception:
        # If exact minute missing, try nearest minute within +/- 1 minute
        for delta_sec in (60, -60, 120, -120):
            try_ts = ts + pd.Timedelta(seconds=delta_sec)
            try:
                if isinstance(stock_data, pd.DataFrame):
                    if 'Close' in stock_data.columns:
                        return float(stock_data.loc[try_ts, 'Close'])
                    return float(stock_data.loc[try_ts].iloc[0])
                else:
                    return float(stock_data.loc[try_ts])
            except Exception:
                continue
    return float('nan')



# Heston Model Calibration - minimize mean squared errors between the market and modeled prices.
def Heston_calibration(ticker_str, r):
    """
    This is the function that captures the Heston paramters for a given stock under discount rate r, that is
    returning the parameters [kappa, theta, xi, rho, v0] assuming the stock movement is well fitted into Heston's model.
    """
    ticker = yf.Ticker(ticker_str)
    stock_data = yf.download(ticker_str, period = '1d', interval = '1m')

    
    expirations = ticker.options  # This collects the option expiration dates for ticker.




    # We first clean the call option data.
    option_data = []     # Creating an empty data frame to store stock data.

    for date in expirations:
        chain = ticker.option_chain(date)   # Get the stock chain on date'.
    
        # Add expiration and label
        calls = chain.calls.copy()   # Creating a calls data frame for call options, and add expiration date as well as option type.
        calls['expiration'] = date
        calls['option_type'] = 'call'
    
        option_data.append(calls) 

    options_data = pd.concat(option_data, ignore_index=True)

    options_data = options_data.drop(columns = ['impliedVolatility']) # Delete the implied volatility column, as we will recalculate that (the one given by yfinance is not reliable).
    
    # Delete any options that were traded in the interval of historical stock values obtained
    start_date = stock_data.index[0]
    options_data = options_data[options_data['lastTradeDate']>=start_date]
    
    
    #Insert column of time to expiration in years of the option contract measured from time of last trade
    options_data['time_to_expiration'] = options_data.apply(
    lambda row: find_tte_yf_options(expiration_date = row['expiration'],              # Calling the function that calculates the time till expiration.
                                   last_trade_date = row['lastTradeDate']),
        axis = 1
    )
    
    
    #Add in column of the spot price of stock when the option trade occured.
    options_data['spot_price'] = options_data.apply(
        lambda row: yf_find_approx_spot(stock_data['Close'], row['lastTradeDate']),
        axis=1
    )
    options_data = options_data.dropna()
    
    #Create data frames that keeps relevant information.
    options_data = options_data[['strike', 'lastPrice', 'lastTradeDate',\
                                 'expiration', 'option_type','time_to_expiration', 'spot_price']]
    options_data_calls = options_data[(options_data['option_type'] == 'call')].copy()
  
    
    
    #Remove rows with undefined values
    options_data_calls = options_data_calls.dropna() 


    #Manually drop data with tte >1 or <0.5, which reasonably reduces the dataset size and improve the computation time.
    options_data_calls = options_data_calls[(options_data_calls['time_to_expiration']<=1)\
                                        & (options_data_calls['time_to_expiration']>=.5)]


    # Next, we set up a regression algorithm to identify the optimal values of the parameters in the least difference square sense.
    # We first define the loss function, which is given by the mean square of the differences of modeled and market prices.
    def objective(params, option_data, r):                  
        kappa, theta, xi, rho, v0 = params
        
        error = 0    
        for _, row in option_data.iterrows():
            model_price = heston_call(             # Here, we use a semi-closed formula to calculte the Heston price of a European call option (see Functions.py for the formula).      
                S0=row['spot_price'],
                K=row['strike'],
                v0=v0,
                r=r,
                t=row['time_to_expiration'],
                kappa=kappa,
                theta=theta,
                xi=xi,
                rho=rho
            )
            market_price = row['lastPrice']
            error += (model_price - market_price) ** 2    # The loss function here is ||model_price - market_price||_2^2, and we minimize it.
            
        return error / len(option_data) 

    # Say, if we want to switch to a different model and calibrate its parameters, then we only need to change the parameters to that of the different model,
    # and then change the model_price function above.

    initial_guess = [1.0, 0.04, 0.3, -0.5, 0.04]  # These are the initial values for the optimization algorithm for finding: kappa, theta, xi, rho, v0.
    bounds = [(1e-4, 10), (1e-4, 1), (1e-4, 2), (-0.99, 0.99), (1e-4, 1)]  # These are the bounds for the above parameters.
    
       
    # Next, we activate a minimization process to the above loss function, hence identify the calibrated parameters.
    result = minimize(
        objective,          # We are minimizing the value of the (averaged) error function above.
        initial_guess,      # Initial values for parameters.
        args=(options_data_calls, r),    # Our call option prices data set, as well as the discount rate.
        bounds=bounds,
        method='L-BFGS-B',        # The method we choose for the optimization algorithm.
        options={
            'disp': True,   
            'maxiter': 20,       
            'ftol': 1e-4,         
            'gtol': 1e-4          
        }
    )
    
    calibrated_params = result.x

    return calibrated_params