<a href="https://colab.research.google.com/github/anitamezzetti/ML_finance/blob/main/generate_dataset_multiple%20maturities.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import os
from time import time
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
start = time()

# Set the constants
# -----------------------------------------------------------------------------------
t_max = 1.0
sigma = 0.61
theta = 0.019
kappa = 6.21

T1 = 10

rho_choice = np.array([-0.5, -0.7, -0.9])
rho_probability = np.array([0.25, 0.5, 0.25])

Stock prices simulation:

In [4]:
def stock_price_generator (t_max, n ,m, r, S0, k, V0, sigma, theta, kappa, rho_choice, rho_probability):
    dt = t_max / n
    
    # Brownian motions:
    dw_v = np.random.normal(size=(m, n)) * np.sqrt(dt)
    dw_i = np.random.normal(size=(m, n)) * np.sqrt(dt)

    rho = np.random.choice(rho_choice, size=(m, 1), p = rho_probability)
    dw_s = rho * dw_v + np.sqrt(1.0 - rho ** 2) * dw_i

    # Perform time evolution 
    s = np.empty((m, n + 1)) # initialisation stock prices vector
    s[:, 0] = S0

    v = np.ones(m) * V0

    for t in range(n):
        dv = kappa * (theta - v) * dt + sigma * np.sqrt(v) * dw_v[:, t]
        ds = r * s[:, t] * dt + np.sqrt(v) * s[:, t] * dw_s[:, t]

        v = np.clip(v + dv, a_min=0.0, a_max=None)
        s[:, t + 1] = s[:, t] + ds
      
        
    return s
    

In [5]:
def find_expected_payoff(stock_path, k, r, t_max):
    payoff = max(stock_path[-1] - k, 0) # one payoff for each simulation
    c = payoff * np.exp(-r * t_max)     # in case r=0, this step is useless
    
    return c

In [6]:
K = [1.5, 2, 2.5]
S0 = [2, 2.5, 3.5]
V0 = [0.01, 0.05, 0.07]
int_rates = [0, 0.05]

In [7]:
time_maturity = [0.5, 1, 2, 5]
num_times = [100, 250, 500]
num_simulations = [100, 500, 1000]

In [8]:
df = pd.DataFrame(columns=['price', 'stock', 'maturity', 'strike', 'initial_vol', 'interest_rates'])

In [9]:
# fill the dataset
for s0 in S0: 
  print(f"s0 {s0}")
  for k in K:
    print(f"k {k}")
    for v0 in V0:
      print(f"vo {v0}")
      for t in time_maturity:
        print(f"t {t}")
        for r in int_rates:
            for n in num_times:
                for m in num_simulations:
                    s = stock_price_generator (t, n ,m, r, s0, k, v0, sigma, theta, kappa, rho_choice, rho_probability)

                    for stock_path in s:
                        p = find_expected_payoff(stock_path, k, r, t)

                        new_row = {'price':p, 'stock':stock_path[T1], 'maturity':t, 'strike':k, 'initial_vol': v0, 'interest_rates':r}
                        #append row to the dataframe
                        df = df.append(new_row, ignore_index=True)


s0 2
k 1.5
vo 0.01
t 0.5


KeyboardInterrupt: ignored

In [10]:
df.head()

Unnamed: 0,price,stock,maturity,strike,initial_vol,interest_rates
0,0.254059,1.922095,0.5,1.5,0.01,0.0
1,0.378549,2.03651,0.5,1.5,0.01,0.0
2,0.686526,1.973351,0.5,1.5,0.01,0.0
3,0.693271,2.000052,0.5,1.5,0.01,0.0
4,0.218283,2.037222,0.5,1.5,0.01,0.0


In [13]:
zero_values = sum(x == 0 for x in df.price)
non_zeros_values = sum(x != 0 for x in df.price)

print(f"This database contains {len(df)} options. \n{zero_values} have zero value.")

This database contains 2706 options. 
31 have zero value.


In [25]:
from google.colab import  drive

drive.mount('/cola_results')

df.to_csv('/My Drive/cola_results/data.csv')

Drive already mounted at /cola_results; to attempt to forcibly remount, call drive.mount("/cola_results", force_remount=True).


FileNotFoundError: ignored