In [1]:
import matplotlib.pyplot as plt
import numpy as np
import random
import torch
import pandas as pd

# Generating synthetic data for OU

In [28]:
%run NCoinDP_functions.ipynb

In [29]:
prcs = "OU" 

In [30]:
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
if torch.cuda.is_available(): 
 dev = "cuda:0" 
else: 
 dev = "cpu"
device = torch.device(dev) 

2.1.2+cu121
True
NVIDIA A10


In [None]:
# Default : cuda
torch.set_default_device('cpu')

nums = [500, 1000, 3000]
for num in nums:
    # Number of data points
    n = num

    # time inteval
    delta = 1/52

    # Generating times
    obtime = np.arange(0,n+1)/n * n * delta

    # Number of synthetic data
    L = 1000000
    #L = 500000

    if prcs == "OU":
        # Range of parameters
        mu_range = [1, 5]
        theta_range = [1, 2.5]
        sigma2_range = [0.5, 2]

        # Training + validation + Test data generating
        torch.manual_seed(510)
        mu_ran     = torch.rand(L) * (mu_range[1] -  mu_range[0]) + mu_range[0]
        theta_ran  = torch.rand(L) * (theta_range[1] -  theta_range[0]) + theta_range[0]
        sigma2_ran = torch.rand(L) * (sigma2_range[1] -  sigma2_range[0]) + sigma2_range[0]

        # Output: L x 3 matrix
        output = torch.stack((mu_ran, theta_ran, sigma2_ran), dim = 1)

        # J Synthetic data generating
        torch.manual_seed(511)
        y0_tmp = torch.normal(theta_ran, torch.sqrt(sigma2_ran/(2*mu_ran)))
        yL = OU_simul_sample(L, obtime, y0_tmp, mu_ran, theta_ran, sigma2_ran ** (1/2))

        # Summary Statistics
        X = OU_summary(yL)
        tmp = "../depot_hyun/hyun/syn_data/OU_sim_n"+str(num)+".pt"
    
    # Save
    torch.save([X, output], tmp)

# Generating Test Data Sets for OU

In [None]:
prcs = "OU" 

if prcs == "OU":
    S1 = [[1.5, 2, 1], [2.5, 2, 1], [3.5, 2, 1], [4.5, 2, 1]]
    S2 = [[3, 1.2, 1], [3, 1.5, 1], [3, 1.8, 1], [3, 2.1, 1]]
    S3 = [[3, 2, 0.7], [3, 2, 1.0], [3, 2, 1.3], [3, 2, 1.6]]


scenarios = [S1, S2, S3]
scenarios_name = ["S1", "S2", "S3"]

In [None]:
import pandas as pd
nums = [500, 1000, 3000]

# Generate the simulation data to get the mean and standard deviation of estimated parameter
sim = 3000
# Generating times
delta = 1/52

for num in nums:
    l = 0
    for scenario in scenarios:
        torch.manual_seed(401)
        n = num
        obtime = np.arange(0,n+1) * delta
        
        param1 = scenario[0]
        param2 = scenario[1]
        param3 = scenario[2]
        param4 = scenario[3]

        if prcs == "OU":
            y0_sim1 = torch.normal(torch.ones(sim) * param1[1], torch.ones(sim) * ((param1[2]/ (2* param1[0]) )**(1/2) )) 
            y0_sim2 = torch.normal(torch.ones(sim) * param2[1], torch.ones(sim) * ((param2[2]/ (2* param2[0]) )**(1/2) )) 
            y0_sim3 = torch.normal(torch.ones(sim) * param3[1], torch.ones(sim) * ((param3[2]/ (2* param3[0]) )**(1/2) )) 
            y0_sim4 = torch.normal(torch.ones(sim) * param4[1], torch.ones(sim) * ((param4[2]/ (2* param4[0]) )**(1/2) )) 

            sim_data1 = OU_simul_sample(sim, obtime, y0_sim1, param1[0] * torch.ones(sim), param1[1] * torch.ones(sim), param1[2] ** (1/2) * torch.ones(sim))
            sim_data2 = OU_simul_sample(sim, obtime, y0_sim2, param2[0] * torch.ones(sim), param2[1] * torch.ones(sim), param2[2] ** (1/2) * torch.ones(sim))
            sim_data3 = OU_simul_sample(sim, obtime, y0_sim3, param3[0] * torch.ones(sim), param3[1] * torch.ones(sim), param3[2] ** (1/2) * torch.ones(sim))
            sim_data4 = OU_simul_sample(sim, obtime, y0_sim4, param4[0] * torch.ones(sim), param4[1] * torch.ones(sim), param4[2] ** (1/2) * torch.ones(sim))

            # Save for NCoin and MLE
            tmp = "../depot_hyun/hyun/test_data/OU_test_n" + str(num) + "_"+ scenarios_name[l] + ".pt"
            torch.save([[sim_data1, sim_data2, sim_data3, sim_data4], scenario] , tmp)

            # Save for GMM
            for j in range(4):
                tmp2 = "../depot_hyun/hyun/test_data/GMM/OU_test_n" + str(num) + "_" + scenarios_name[l] + "_" +str(j) + ".txt"
                if j == 0:
                    px = pd.DataFrame(sim_data1)
                    px.to_csv(tmp2, sep='\t', index=False)
                if j == 1:
                    px = pd.DataFrame(sim_data2)
                    px.to_csv(tmp2, sep='\t', index=False)
                if j == 2:
                    px = pd.DataFrame(sim_data3)
                    px.to_csv(tmp2, sep='\t', index=False)
                if j == 3:
                    px = pd.DataFrame(sim_data4)
                    px.to_csv(tmp2, sep='\t', index=False)   
                    
        l += 1