In [None]:
import numpy as np
import pandas as pd
import random
from scipy.stats import norm

import plotly.express as px
import plotly.graph_objs as go

In [None]:
###https://github.com/johntfoster/bspline
!pip install bspline
import bspline
import bspline.splinelab as splinelab

Collecting bspline
  Downloading bspline-0.1.1.tar.gz (84 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/84.2 kB[0m [31m749.4 kB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.2/84.2 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: bspline
  Building wheel for bspline (setup.py) ... [?25l[?25hdone
  Created wheel for bspline: filename=bspline-0.1.1-py3-none-any.whl size=84483 sha256=ca2df91ef9b4ccae6b6ba4af71c8033678807eec8760579c35d795d4328f8541
  Stored in directory: /root/.cache/pip/wheels/3c/ab/0a/70927853a6d9166bc777922736063a6f99c43a327c802f9326
Successfully built bspline
Installing collected packages: bspline
Successfully installed bspline-0.1.1


In [None]:
def terminal_payoff(st, k, exercise_type='c'):
    '''
    Calculate terminal payoff for a call/put option
    Args:
        st: final stock price
        k: strike
        exercise_type: 'c' for call and 'p' for put
    Examples:
        st_eg = np.linspace(1, 100, 100)
        payoff_eg = terminal_payoff(st_eg, 50)
        fig = px.scatter(x=st_eg, y=payoff_eg, title='Terminal Payoff w/ Stock Price for Call Option')
        fig.show()
    '''
    return np.maximum(st - k, 0) if exercise_type == 'c' else np.maximum(k - st, 0)

In [None]:
def delta_calc(s, k, maturity, r, sigma, exercise_type='c'):
    '''
    Calculate delta of a call/put option based on BSM formula, specifically N(d1)
    Args:
        ercise_type: 'c' for call and 'p' for put
    Examples:
        s_eg = np.linspace(1, 100, 100)
        d_eg = delta_calc(s_eg, k=50, maturity=1.0, r=0.03, sigma=0.18)

        fig = px.scatter(x=s_eg, y=d_eg, title='Delta w/ Stock Price for Call Option')
        fig.show()
    '''
    d1 = (np.log(s / k) + (r + 0.5 * sigma ** 2) * maturity) / (sigma * np.sqrt(maturity))
    if exercise_type == 'c':
        delta_calc = norm.cdf(d1, loc=0, scale=1)
    else:
        delta_calc = -norm.cdf(-d1, loc=0, scale=1)
    return delta_calc

In [None]:
def black_scholes(s, k, maturity, r, sigma, exercise_type='c'):
    '''
    Calculate price of a call/put option based on BSM formula
    Args:
        ercise_type: 'c' for call and 'p' for put
    Examples:
        s_eg = np.linspace(1, 100, 100)
        p_eg = black_scholes(s_eg, k=50, maturity=1.0, r=0.03, sigma=0.18)

        fig = px.scatter(x=s_eg, y=p_eg, title='Price w/ Stock Price for Call Option')
        fig.show()
    '''
    d1 = (np.log(s / k) + (r + 0.5 * sigma ** 2) * maturity) / (sigma * np.sqrt(maturity))
    d2 = d1 - sigma * np.sqrt(maturity)

    if exercise_type == 'c':
        price = s * norm.cdf(d1, loc=0, scale=1) - k * np.exp(-r * maturity) * norm.cdf(d2, loc=0, scale=1)
    else:
        price = k * np.exp(-r * maturity) * norm.cdf(-d2, loc=0, scale=1) - s * norm.cdf(-d1, loc=0, scale=1)
    return price

In [None]:
def show_sel_path(input_df, sel_path, value_name='Value'):
    df = input_df.reset_index().rename(columns={'index':'path_id'})
    df = df.melt(id_vars=['path_id'], value_vars=input_df.columns, var_name='Time Steps', value_name=value_name)

    fig = px.line(df[df['path_id'].isin(sel_path)], x='Time Steps', y=value_name, color='path_id')
    return fig

In [None]:
np.random.seed(100)
num_of_path = 2**14-1 # number of paths

s0 = 100  # initial stock price
mu = 0.05  # drift
sigma = 0.15  # constant volatility
r = 0.03  # interest rate
expiry = 0.25  # expiry, 3-month
num_of_timestep = 63  # number of time steps expiry*1/252
delta_t = expiry / num_of_timestep  # time interval, daily 1/252
strike = s0  # assume at-the-money option
exercise_type = 'c' # 'c' for call option and 'p' for put option

gamma = np.exp(- r * delta_t)  # discount factor
risk_aversion = 0.1 # risk aversion  \kappa
risk_lambda = risk_aversion / 2.0 # \lambda

data_cutoff = int(num_of_path * 0.7) #split data into training and test

In [None]:
# standard normal random variable Z
rand_number = pd.DataFrame(np.random.randn(num_of_path, num_of_timestep), index=range(1, num_of_path + 1),
                           columns=range(1, num_of_timestep + 1))

# stock price, rows are MC paths, columns are time steps
spot_df = pd.DataFrame([], index=range(1, num_of_path + 1), columns=range(num_of_timestep + 1))
spot_df[0] = s0
for t in range(1, num_of_timestep + 1):
    spot_df[t] = spot_df[t - 1] * np.exp((mu - 0.5 * sigma**2) * delta_t + sigma * np.sqrt(delta_t) * rand_number[t])

In [None]:
spot_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
1,100,98.375346,98.709582,99.806265,99.583793,100.526946,101.03212,101.259063,100.25581,100.091846,...,95.685913,95.799122,96.274707,95.508201,96.657352,96.599521,95.805528,95.025183,95.056536,95.285059
2,100,100.028183,98.509348,97.55715,98.138996,98.839276,99.818379,98.491787,96.807971,97.158374,...,88.421481,89.016401,87.397989,87.30016,86.051728,87.745625,86.608276,85.728202,85.548203,84.413754
3,100,100.916275,100.913296,101.785597,102.534652,101.109334,99.990488,101.238322,102.1691,101.016219,...,101.42782,100.951508,100.761111,101.179621,100.15444,101.347464,101.142724,100.487192,99.942606,100.656507
4,100,100.428147,102.143414,102.655602,103.813282,105.88782,105.448844,106.113982,105.955168,106.994996,...,109.009628,108.743516,109.945883,111.174686,111.931996,110.599273,112.218935,112.045497,111.665135,112.169171
5,100,101.400335,101.647627,101.627081,102.444487,104.710755,103.801165,103.926881,106.467176,106.890415,...,117.573875,119.659444,119.29832,119.880303,118.683827,119.350951,119.46694,119.919407,121.460086,123.412995


In [None]:
delta_df = pd.DataFrame([], index=range(1, num_of_path + 1), columns=range(num_of_timestep + 1))
for t in range(num_of_timestep + 1):
    delta_df[t] = delta_calc(spot_df[t], strike, expiry - delta_t * t, r, sigma, exercise_type)

In [None]:
option_value_df = pd.DataFrame([], index=range(1, num_of_path + 1), columns=range(num_of_timestep + 1))
for t in range(num_of_timestep):
    option_value_df[t] = black_scholes(spot_df[t], strike, expiry - delta_t * t, r, sigma, exercise_type)
option_value_df.iloc[:,-1] = terminal_payoff(spot_df.iloc[:,-1], strike, exercise_type)
#black_scholes(s=s0, k=strike, maturity=expiry, r=r , sigma=sigma, exercise_type=exercise_type)

  option_value_df.iloc[:,-1] = terminal_payoff(spot_df.iloc[:,-1], strike, exercise_type)


In [None]:
sel_path = np.random.choice(num_of_path, 5)
display(show_sel_path(spot_df, sel_path, 'Spot'))
display(show_sel_path(delta_df, sel_path, 'Delta'))
display(show_sel_path(option_value_df, sel_path, 'Option Value'))

In [None]:
delta_s = np.exp(-r * delta_t) * spot_df.loc[:,1:num_of_timestep].values - spot_df.loc[:,0:num_of_timestep-1]
delta_c = np.exp(-r * delta_t) * option_value_df.loc[:,1:num_of_timestep].values - option_value_df.loc[:,0:num_of_timestep-1]

spot_df_train, spot_df_test = spot_df[:data_cutoff], spot_df[data_cutoff:]
delta_df_train, delta_df_test = delta_df[:data_cutoff], delta_df[data_cutoff:]
delta_s_train, delta_s_test = delta_s[:data_cutoff], delta_s[data_cutoff:]
delta_c_train, delta_c_test = delta_c[:data_cutoff], delta_c[data_cutoff:]

In [None]:
# delta_s_tilde = delta_s - delta_s.mean(axis=0)
# delta_c_tilde = delta_c - delta_c.mean(axis=0)

delta_s_tilde_train = delta_s_train - delta_s_train.mean(axis=0)
delta_s_tilde_test = delta_s_test - delta_s_test.mean(axis=0)

delta_c_tilde_train = delta_c_train - delta_c_train.mean(axis=0)
delta_c_tilde_test = delta_c_test - delta_c_test.mean(axis=0)

In [None]:
# display(show_sel_path(delta_s , sel_path, 'Change'))
# display(show_sel_path(delta_s_tilde , sel_path, 'Change'))

In [None]:
def gen_basis_matrix(input_df, order, num_func, mc_path):
    '''construct basis functions'''
    input_df = input_df.copy()
    x_min = np.min(input_df.values)
    x_max = np.max(input_df.values)

    tau = np.linspace(x_min, x_max, num_func)
    knots = splinelab.aptknt(tau, order)
    basis_spline = bspline.Bspline(knots, order)
    ##display(basis_spline.plot())

    ###calculate \Phi_n(X_t^k), it has 3 dimensions, n is index of the basis function, t is the time step, k is the index of the MC path
    phi_mat = np.zeros((num_of_timestep + 1, mc_path, num_func))
    for t in np.arange(num_of_timestep + 1):
        x_t = input_df[t].values
        phi_mat[t,:,:] = np.array([basis_spline(x_t_k) for x_t_k in x_t])

    return phi_mat

### matrices or vectors for calculating optimal coefficients
def gen_matrix_a(phi_x, t_idx, s_tilde, identity_mat_val=1e-5):
    '''
    Args:
        identity_mat_val: to avoid singular matrix for the inverse
    '''
    phi_x_t = phi_x[t_idx, :, :]
    s_tilde_squared = (s_tilde[t_idx] ** 2).values.reshape(-1, 1)
    mat_a = np.matmul(phi_x_t.T, phi_x_t * s_tilde_squared) + identity_mat_val * np.eye(phi_x_t.shape[1])
    return mat_a

def gen_vector_b(phi_x, t_idx, s_change, s_tilde, c_tilde, risk_lambda):
    phi_x_t = phi_x[t_idx, :, :]
    coef = - s_tilde[t_idx] * c_tilde[t_idx] + 0.5 / risk_lambda * s_change[t_idx]
    vec_b = np.dot(phi_x_t.T, coef)
    return vec_b

def gen_matrix_c(phi_x, t_idx, identity_mat_val=1e-5):
    '''
    Args:
        identity_mat_val: to avoid singular matrix for the inverse
    '''
    phi_x_t = phi_x[t_idx, :, :]
    mat_c = np.matmul(phi_x_t.T, phi_x_t) + identity_mat_val * np.eye(phi_x_t.shape[1])
    return mat_c

def gen_vector_d(phi_x, t_idx, q_df, r_df, gamma):
    phi_x_t = phi_x[t_idx, :, :]
    vec_d = np.dot(phi_x_t.T, r_df.loc[:,t] + gamma * q_df.loc[:, t+1])
    return vec_d

In [None]:
delta_s_test.index

RangeIndex(start=11469, stop=16384, step=1)

In [None]:
def calc_reward_df(opt_hedge, ds, dc, s_tilde, c_tilde, risk_lambda):

    reward_calc = pd.DataFrame([], index=ds.index, columns=range(num_of_timestep))
    for t in range(num_of_timestep):
        reward_calc.loc[:,t] = opt_hedge.loc[:,t] * ds.loc[:,t] + dc.loc[:,t] \
                                - risk_lambda*(opt_hedge.loc[:,t]**2 * s_tilde.loc[:,t]**2 \
                                + 2 *opt_hedge.loc[:,t]* s_tilde.loc[:,t]*c_tilde.loc[:,t]\
                                + c_tilde.loc[:,t]**2)

    total_reward = 0.0
    for t in range(num_of_timestep):
        total_reward = total_reward + np.exp(-r * delta_t * t) * reward_calc.loc[:,t].mean()
    return reward_calc, total_reward

In [None]:
# new variable X, no drift
state_x_df = - (mu - 0.5 * sigma**2) * np.arange(num_of_timestep+1) * delta_t + np.log(spot_df)
state_x_df_train, state_x_df_test = state_x_df[:data_cutoff], state_x_df[data_cutoff:]

spline_order = 3  # order of spline
num_basis_func = 10 # number of basis functions
phi_mat = gen_basis_matrix(state_x_df_train, order=spline_order, num_func=num_basis_func, mc_path=data_cutoff) #Phi(X)

In [None]:
###calculate coefficients phi = A^(-1) * B
opt_act = pd.DataFrame([], index=range(1, data_cutoff+1), columns=range(num_of_timestep))
##opt_act.iloc[:,-1] = 0
for t in range(num_of_timestep-1, -1, -1):
    mat_a = gen_matrix_a(phi_mat, t, s_tilde=delta_s_tilde_train)
    vec_b = gen_vector_b(phi_mat, t, s_change=delta_s_train, s_tilde=delta_s_tilde_train, c_tilde=delta_c_tilde_train, risk_lambda=risk_lambda)
    coef_phi = np.dot(np.linalg.inv(mat_a), vec_b)

    opt_act[t] = np.dot(phi_mat[t,:,:], coef_phi)

In [None]:
reward_df, qlbs_reward = calc_reward_df(opt_act.copy(), ds=delta_s_train, dc=delta_c_train,
                                      s_tilde=delta_s_tilde_train, c_tilde=delta_c_tilde_train,
                                      risk_lambda=risk_lambda)


In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`



In [None]:
###calculate coefficients omega = C^(-1) * D
q_fun_df = pd.DataFrame([], index=range(1, data_cutoff+1), columns=range(num_of_timestep+1))
q_fun_df.iloc[:,-1] = 0

for t in range(num_of_timestep-1, -1, -1):
    mat_c = gen_matrix_c(phi_mat, t)
    vec_d = gen_vector_d(phi_mat, t, q_fun_df, reward_df, gamma)
    coef_omega = np.dot(np.linalg.inv(mat_c), vec_d)

    q_fun_df.loc[:,t] = np.dot(phi_mat[t,:,:], coef_omega)


In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`


In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`



In [None]:
q_fun_df[0].unique(), opt_act[0].unique()

(array([0.26440094]), array([-0.25847892]))

In [None]:
### check if action/hedge yield higher rewards than delta hedge
delta_reward_calc,  delta_reward = calc_reward_df(-1 * delta_df_train.copy(),
                                                 ds=delta_s_train, dc=delta_c_train,
                                                 s_tilde=delta_s_tilde_train, c_tilde=delta_c_tilde_train,
                                                 risk_lambda=risk_lambda)
# ql_reward_calc, qlbs_reward = calc_reward_df(opt_act.copy(),
#                                            delta_s=delta_s, delta_c=delta_c,
#                                            s_tilde=delta_s_tilde, c_tilde=delta_c_tilde,
#                                            risk_lambda=risk_lambda)

qlbs_reward, delta_reward


In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`



(0.2647050314895559, -0.009307158845436822)

In [32]:
###compare the action/hedge with delta hedge
data=[
    go.Scatter(x=delta_df_train.columns[:num_of_timestep], y=-1*delta_df_train.mean(axis=0).values[:num_of_timestep], line=dict(dash='dash'), name='Delta hedge'),
    go.Scatter(x=opt_act.columns, y=opt_act.mean(axis=0).values, name='DP hedge'),
]
layout=go.Layout(
        title=go.layout.Title(text='Average hedge each timestep', x=0.5),
        width=800,
        height=400
    )
fig = go.Figure(data=data, layout=layout)
fig.update_layout(xaxis_title="Timestep", yaxis_title="Hedge")
fig.show()

data=[
    go.Scatter(x=delta_reward_calc.columns, y=delta_reward_calc.mean(axis=0).values, line=dict(dash='dash'), name='Delta hedge'),
    go.Scatter(x=reward_df.columns, y=reward_df.mean(axis=0).values, name='DP hedge'),
]
layout=go.Layout(
        title=go.layout.Title(text='', x=0.5), ##Average reward each timestep
        width=800,
        height=400
    )
fig = go.Figure(data=data, layout=layout)
fig.update_layout(xaxis_title="Timestep", yaxis_title="Reward")
fig.show()


In [None]:
###we check what is reward of next step with opt_act[0].unique() and delta hedge
def calc_one_step_reward(hedge_qty, t, ds, dc, s_tilde, c_tilde, risk_lambda):
  t = 0
  reward =  hedge_qty * ds.loc[:,t] + dc.loc[:,t] \
            - risk_lambda*(hedge_qty**2 * s_tilde.loc[:,t]**2 \
            + 2 *hedge_qty* s_tilde.loc[:,t]*c_tilde.loc[:,t]\
            + c_tilde.loc[:,t]**2)
  return reward

In [None]:
hedge_qty_qlbs = opt_act[0].unique()
hedge_qty_delta = -1*delta_df_test[0].unique()
r_qlbs = calc_one_step_reward(hedge_qty_qlbs, 0,
                    ds=delta_s_test, dc=delta_c_test,
                    s_tilde=delta_s_tilde_test, c_tilde=delta_c_tilde_test,
                    risk_lambda=risk_lambda)
r_delta = calc_one_step_reward(hedge_qty_delta, 0,
                    ds=delta_s_test, dc=delta_c_test,
                    s_tilde=delta_s_tilde_test, c_tilde=delta_c_tilde_test,
                    risk_lambda=risk_lambda)

In [None]:
r_qlbs.mean(), r_delta.mean()

(-0.004505928198377556, -0.00017957202743091065)