In [1]:
import math
import time
import pickle
import os
import glob
import numpy as np
from multiprocessing import Pool

device = 'cpu'
data_dir = 'data'

if device == 'cpu' :
    import numpy.random as random
    pool_n = 10
    
    randn = lambda x : random.randn(x)
    relu = lambda x : np.maximum(x,0)
    exp = lambda x : np.exp(x)
    mean = lambda x : np.mean(x)
    set_seed = lambda x : random.seed(x)
    
else :
    import torch
    pool_n = 1
    
    randn = lambda x : torch.randn(x)
    relu = lambda x : torch.relu(x)
    exp = lambda x : torch.exp(x)
    mean = lambda x : torch.mean(x)
    set_seed = lambda x : torch.manual_seed(x)

In [2]:
train_file_num = 100
test_file_num = 100
N = 100000

In [3]:
def MC(vol,K,T,N) :
    
    S0 = 1.
    W = randn(N)*np.sqrt(T)
    S = S0*exp( -0.5*vol**2*T + vol*W )
        
    p = relu(S-K)
    payoff = np.maximum(S0-K,0.)
    tv = mean(p)-payoff
    
    if not device == 'cpu' :
        tv = tv.cpu().detach()
    return tv

In [4]:
def make_data_impl(arg) :
    
    rank,data_num,N = arg 
    
    ns = time.time_ns()
    seed = int(time.time())+3*rank**2+5*rank
    set_seed(seed)
         
    vol = np.random.uniform(0.01,1.,data_num)
    T = np.random.uniform(0.01,1.,data_num)
    K0 = np.random.uniform(-2.,2.,data_num)
    K = K0*vol*np.sqrt(T)
    K = np.exp(K)

    tv = []
    for i,(vol_,K_,T_) in enumerate(zip(vol,K,T)) :
        tv.append( MC(vol_,K_,T_,N) )
    
    x = np.vstack([vol,K0,T]).T
    y = np.array(tv).reshape(-1,1)
    return (x,y)

def make_data(data_num,N) :
    with Pool(pool_n) as p:  
        results = p.map(make_data_impl,[(rank,data_num//pool_n,N) for rank in range(pool_n)]) 
    x = [e[0] for e in results]; x = np.array(x).reshape(-1,3)
    y = [e[1] for e in results]; y = np.array(y).reshape(-1,1)
    return (x,y)    

In [5]:
%%time

data_dir = 'data'
if not os.path.exists(data_dir) :
    os.mkdir(data_dir)
    os.mkdir(f'{data_dir}/train')
    os.mkdir(f'{data_dir}/test')

for mode,file_num in zip(['train','test'],[train_file_num,test_file_num]) :
    
    print(mode)
    if not os.path.exists(os.path.join(data_dir,mode)) :
        os.mkdir(os.path.join(data_dir,mode))
    filelist = glob.glob(f'{data_dir}/{mode}/bs_{mode}_*.pkl')
    start = len(filelist)

    for i in range(start,file_num) :
        print(i,end=' ')
        x,y = make_data(10000,N)
        with open(f'{data_dir}/{mode}/bs_{mode}_{N:.0e}_{i:05d}.pkl','wb') as f: 
            pickle.dump([x,y],f)
    print('')
    print('-'*50)

train
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 
--------------------------------------------------
test
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 
--------------------------------------------------
CPU times: user 1.44 s, sys: 3.25 s, total: 4.69 s
Wall time: 8min 30s
