<a href="https://colab.research.google.com/github/anitamezzetti/ML_finance/blob/main/generate_dataset_multiple%20maturities.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import numpy as np
import os
from time import time
import matplotlib.pyplot as plt
import pandas as pd

In [10]:
start = time()

# model parameters
sigma = 0.61
theta = 0.019
kappa = 6.21
rho = -0.5
T1 = 10

# initial conditions
S0 = [2, 2.5, 3.5]
V0 = [0.01, 0.05, 0.07]

In [11]:
# option parameters
K = [1.5, 2, 2.5]
int_rates = [0, 0.05]
time_maturity = [0.5, 1, 2, 5]

In [12]:
num_simulations = 2000
n = 500

Stock prices simulation:

In [13]:
def stock_price_generator (t_max, n ,m, S0, k, V0, sigma, theta, kappa, rho):
    dt = t_max / n
    
    # Brownian motions:
    dw_v = np.random.normal(size=(m, n)) * np.sqrt(dt)
    dw_i = np.random.normal(size=(m, n)) * np.sqrt(dt)

    dw_s = rho * dw_v + np.sqrt(1.0 - rho ** 2) * dw_i

    # Perform time evolution 
    s = np.empty((m, n + 1)) # initialisation stock prices vector
    s[:, 0] = S0

    v = np.ones(m) * V0

    for t in range(n):
        dv = kappa * (theta - v) * dt + sigma * np.sqrt(v) * dw_v[:, t]
        ds = r * s[:, t] * dt + np.sqrt(v) * s[:, t] * dw_s[:, t]

        v = np.clip(v + dv, a_min=0.0, a_max=None)
        s[:, t + 1] = s[:, t] + ds
      
        
    return s
    

In [14]:
def find_expected_payoff(stock_path, k, r, t_max):
    payoff = max(stock_path[-1] - k, 0) # one payoff for each simulation
    c = payoff * np.exp(-r * t_max)     # in case r=0, this step is useless
    
    return c

In [15]:
df = pd.DataFrame(columns=['price', 'stock', 'maturity', 'strike', 'initial_vol', 'interest_rates'])

In [16]:
# fill the dataset
for s0 in S0: 
  print(f"s0 {s0}")
  for k in K:
    print(f"k {k}")
    for v0 in V0:
      print(f"vo {v0}")
      for t in time_maturity:
        for r in int_rates:
            s = stock_price_generator (t, n, num_simulations, s0, k, v0, sigma, theta, kappa, rho)

            for stock_path in s:
                p = find_expected_payoff(stock_path, k, r, t)

                new_row = {'price':p, 'stock':stock_path[T1], 'maturity':t, 'strike':k, 'initial_vol': v0, 'interest_rates':r}
                #append row to the dataframe
                df = df.append(new_row, ignore_index=True)


s0 2
k 1.5
vo 0.01


KeyboardInterrupt: 

In [17]:
df.head()

Unnamed: 0,price,stock,maturity,strike,initial_vol,interest_rates
0,0.346709,2.000486,0.5,1.5,0.01,0.0
1,0.593942,2.019955,0.5,1.5,0.01,0.0
2,0.656051,2.004942,0.5,1.5,0.01,0.0
3,0.333379,1.968595,0.5,1.5,0.01,0.0
4,0.545773,2.008548,0.5,1.5,0.01,0.0


In [44]:
zero_values = sum(x == 0 for x in df.price)
non_zeros_values = sum(x != 0 for x in df.price)

print(f"This database contains {len(df)} options. \n{zero_values} have zero value.")

This database contains 1036800 options. 
223590 have zero value.


In [45]:
'''from google.colab import files

df.to_csv('df.csv')
files.download('df.csv')'''

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [3]:
from ipynb.fs.full.closed_form_solution import f, p, p1, p2, call_price
import pandas as pd

In [11]:
df = pd.read_csv('data_complete.csv')
df = df.drop('Unnamed: 0', axis=1)

In [12]:
df.head()

Unnamed: 0,price,stock,maturity,strike,initial_vol,interest_rates
0,0.610045,1.967429,0.5,1.5,0.01,0.0
1,0.438407,2.022543,0.5,1.5,0.01,0.0
2,0.662137,2.043572,0.5,1.5,0.01,0.0
3,0.478112,2.027417,0.5,1.5,0.01,0.0
4,0.491171,1.974101,0.5,1.5,0.01,0.0


In [None]:
df['price_sol'] = df.apply(lambda x: call_price(kappa, theta, sigma, rho, x['initial_vol'], x['interest_rates'] , x['maturity'], x['stock'], x['strike']), axis=1)

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  "    d = cmath.sqrt((rho * sigma * phi * 1j - b)**2 - sigma**2 * (2 * u * phi * 1j - phi**2))\n",


In [None]:
df.head()

In [None]:
df.to