<a href="https://colab.research.google.com/github/anitamezzetti/ML_finance/blob/main/generate_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import os
from time import time
import pandas as pd

In [5]:
start = time()

# Set the constants
# -----------------------------------------------------------------------------------
r = 0
k = 2
V0 = 0.010
sigma = 0.61
theta = 0.019
kappa = 6.21

T1 = 10

rho = -0.5

In [None]:
S0 = np.arange(1,4,0.001)

Stock prices simulation:

In [6]:
def stock_price_generator (t_max, n ,m, r, S0, k, V0, sigma, theta, kappa, rho):
    dt = t_max / n
    
    # Brownian motions:
    dw_v = np.random.normal(size=(m, n)) * np.sqrt(dt)
    dw_i = np.random.normal(size=(m, n)) * np.sqrt(dt)

    dw_s = rho * dw_v + np.sqrt(1.0 - rho ** 2) * dw_i

    # Perform time evolution 
    s = np.empty((m, n + 1)) # initialisation stock prices vector
    s[:, 0] = S0

    v = np.ones(m) * V0

    for t in range(n):
        dv = kappa * (theta - v) * dt + sigma * np.sqrt(v) * dw_v[:, t]
        ds = r * s[:, t] * dt + np.sqrt(v) * s[:, t] * dw_s[:, t]

        v = np.clip(v + dv, a_min=0.0, a_max=None)
        s[:, t + 1] = s[:, t] + ds
      
        
    return s
    

In [7]:
def find_expected_payoff(stock_path, k, r, t_max):
    payoff = max(stock_path[-1] - k, 0) # one payoff for each simulation
    c = payoff * np.exp(-r * t_max)     # in case r=0, this step is useless
    
    return c

In [8]:
time_maturity = np.arange(1,5,0.2)
num_simulations = 2000
n = 500

In [9]:
df = pd.DataFrame(columns=['price', 'stock', 'maturity'])

In [10]:
# fill the dataset
for t in time_maturity:
    print(f't {t}')
    s = stock_price_generator (t, n, num_simulations, r, S0, k, V0, sigma, theta, kappa, rho)

    for stock_path in s:
        p = find_expected_payoff(stock_path, k, r, t)

        new_row = {'price':p, 'stock':stock_path[T1], 'maturity':t}
        #append row to the dataframe
        df = df.append(new_row, ignore_index=True)


t 1.0
t 1.2
t 1.4
t 1.5999999999999999
t 1.7999999999999998
t 1.9999999999999998
t 2.1999999999999997
t 2.3999999999999995
t 2.5999999999999996
t 2.8
t 2.9999999999999996
t 3.1999999999999993
t 3.3999999999999995
t 3.5999999999999996
t 3.7999999999999994
t 3.999999999999999
t 4.199999999999999
t 4.3999999999999995
t 4.6
t 4.799999999999999


In [11]:
df.head()

Unnamed: 0,price,stock,maturity
0,0.030212,1.94286,1.0
1,0.0,2.01573,1.0
2,0.0,1.973625,1.0
3,0.238004,1.969582,1.0
4,0.0,2.035461,1.0


In [12]:
df.to_csv("dataset1.csv")

In [13]:
len(df)

40000