<a href="https://colab.research.google.com/github/anitamezzetti/ML_finance/blob/main/generate_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import os
from time import time
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
start = time()

# Set the constants
# -----------------------------------------------------------------------------------
r = 0
S0 = k = 2.0
t_max = 1.0
V0 = 0.010201
sigma = 0.61
theta = 0.019
kappa = 6.21

T1 = 10

rho = -0.5

Stock prices simulation:

In [3]:
def stock_price_generator (t_max, n ,m, r, S0, k, V0, sigma, theta, kappa, rho_choice, rho_probability):
    dt = t_max / n
    
    # Brownian motions:
    dw_v = np.random.normal(size=(m, n)) * np.sqrt(dt)
    dw_i = np.random.normal(size=(m, n)) * np.sqrt(dt)

    rho = np.random.choice(rho_choice, size=(m, 1), p = rho_probability)
    dw_s = rho * dw_v + np.sqrt(1.0 - rho ** 2) * dw_i

    # Perform time evolution 
    s = np.empty((m, n + 1)) # initialisation stock prices vector
    s[:, 0] = S0

    v = np.ones(m) * V0

    for t in range(n):
        dv = kappa * (theta - v) * dt + sigma * np.sqrt(v) * dw_v[:, t]
        ds = r * s[:, t] * dt + np.sqrt(v) * s[:, t] * dw_s[:, t]

        v = np.clip(v + dv, a_min=0.0, a_max=None)
        s[:, t + 1] = s[:, t] + ds
      
        
    return s
    

In [4]:
def find_expected_payoff(stock_path, k, r, t_max):
    payoff = max(stock_path[-1] - k, 0) # one payoff for each simulation
    c = payoff * np.exp(-r * t_max)     # in case r=0, this step is useless
    
    return c

In [5]:
time_maturity = [0.5, 1, 2, 5]
num_times = [100, 250, 500]
num_simulations = [100, 500, 1000]

In [6]:
df = pd.DataFrame(columns=['price', 'stock', 'maturity'])

In [7]:
# fill the dataset
for t in time_maturity:
    for n in num_times:
        for m in num_simulations:
            s = stock_price_generator (t, n ,m, r, S0, k, V0, sigma, theta, kappa, rho_choice, rho_probability)

            for stock_path in s:
                p = find_expected_payoff(stock_path, k, r, t)

                new_row = {'price':p, 'stock':stock_path[T1], 'maturity':t}
                #append row to the dataframe
                df = df.append(new_row, ignore_index=True)


In [8]:
df.head()

Unnamed: 0,price,stock,maturity
0,0.0,2.035873,0.5
1,0.133383,2.04962,0.5
2,0.091578,1.915976,0.5
3,0.317487,2.010661,0.5
4,0.142101,2.02723,0.5


In [9]:
len(df)

19200

In [11]:
df.to_csv("data.csv")