In [70]:
#jax_aux_file_jitted_25_01_2023 = import_file(os.path.join(Path(os.getcwd()).parent,'jax_aux_file_jitted_25_01_2023.py'))
def corr_np(s,envelope,envelope_params):
    """return overlap area (i.e. correlation) Corr(X_t,X_{t+s}). these are equivalent because the area of the 
    ambit set is normalised to 1. s >0"""

    assert envelope in ['gamma','exponential','ig']
    
    if envelope == 'exponential':
        u = envelope_params[0]
        area = np.exp(- u * s)
        
    elif envelope == 'gamma':
        H,delta = envelope_params
        area = (1+s/delta)**(-H)
        
    elif envelope == 'ig':
        gamma,delta =  envelope_params
        area = np.exp(delta * gamma *(1-np.sqrt(2*s/gamma**2+1)))
    return area

In [None]:
#generic imports
import numpy as np
import time

#ambit stochastics imports
from generate_trawls_with_gaussian_marginal import generate_gaussian_seed_trawls
#gmm fitting of envelope params + gmm / mle for levy seed params
from ambit_stochastics.helpers.marginal_distribution_functions import fit_trawl_marginal
from ambit_stochastics.helpers.acf_functions import fit_trawl_envelope_gmm
import pickle

#jax imports
from jax.config import config
config.update("jax_enable_x64", True)
from bfgs_for_cl_helper import do_modified_bfgs
from   jax import random
import jax.numpy as jnp
import jax

default_precision = jnp.float64
print('Default precision is: ',default_precision)


                         
if __name__ == "__main__":
    
    ##########################simulation study parameters#########################
    
    #trawl simulation parameters
    tau = 1
    nr_trawls = 2000
    nr_simulations = 2
    TRUE_GAUSSIAN_PARAMS = (4, 3)
    assert TRUE_GAUSSIAN_PARAMS[1] > 0
    envelope = 'gamma'
    jax_seed = 4564340345993
    key = random.PRNGKey(jax_seed)

    #trawl function parameters
    TRUE_ENVELOPE_PARAMS = (0.5,0.75)
    np.random.seed(seed = 36363)
    
    np_random_seeds = np.random.randint(low = 1, high = 2**31, size = 1)

        
    #inference params
    nr_mc_samples_per_batch =    10**3
    nr_batches =  10 #giving nr_mc_samples_per_batch * nr_batches total samples 
    max_taylor_deg = 3  # degree of the taylor polynomial used as control variate
    
    #bfgs params
    max_iter_at_once_bfgs = 20
    max_batches_bfgs      = 1
    

    lags_list = ((1,3,5,10,15),)#(1,3,5,10),(1,3,5))#,(1,5,10),(1,5,10,15),(1,5,10,15,20)),(1,3,5,10,15),(1,3,5,10,20))
    n_values = (1500,) #(1500,1000,500,250)#(1000,500,250,150)#,1000, 2500, 5000)#,750,1000,1500)
    assert max(n_values) <= nr_trawls
    
    #results containers
    levy_seed_params_list = []
    d_gmm = dict()
    d_cl  = dict()
    
    
    #simulate the trawl process
    #change the np_random_seed if doing a simulation study with more 
    trawl_instance =  generate_gaussian_seed_trawls(tau = tau,nr_simulations = nr_simulations,
                      nr_trawls = nr_trawls, envelope = envelope,envelope_params = TRUE_ENVELOPE_PARAMS,
                      gaussian_part_params = TRUE_GAUSSIAN_PARAMS,np_seed = np_random_seeds[0])  
    #need to change np_random_seeds[-1] and the key in jax if doing a simulation study 
  
    all_values_not_to_use_in_general = trawl_instance.values

    with open('values_par2.npy', 'wb') as fff:
    	np.save(fff, all_values_not_to_use_in_general)
    

    #fit the gmm model and time it
    start_gmm = time.time()

    #marginal distribution gmm firstly
    for n_index in range(len(n_values)):
        n_to_use      = n_values[n_index]
        values_to_use = all_values_not_to_use_in_general[:,:n_to_use]
        levy_seed_params = fit_trawl_marginal(simulations = values_to_use, levy_seed = 'gaussian', method='MM')
        levy_seed_params_list.append(levy_seed_params)
        
    #envelope gmm secondly
    for lags_index in range(len(lags_list)):
        with open("text.txt","a") as file:
            file.write('lags are' +str(lags_list[lags_index]) + '\n')
        for n_index in range(len(n_values)):
            

            lags_to_use   = lags_list[lags_index] 
            n_to_use      = n_values[n_index]
            values_to_use = all_values_not_to_use_in_general[:,:n_to_use]
            
            if lags_index ==0 and n_index == 0:
                initial_guess = None
                
            elif lags_index > 0:
                previous_lags = lags_list[lags_index-1]
                initial_guess = None #tuple([tuple(i) for i in d_gmm[(previous_lags, n_to_use)]['envelope_params']])

                
            elif lags_index == 0 and n_index > 0:
                previous_n    = n_values[n_index-1]
                initial_guess = None #tuple([tuple(i) for i in d_gmm[(lags_to_use, previous_n)]['envelope_params']])               

            else:
                raise ValueError('we go home')
                


            envelope_params  = fit_trawl_envelope_gmm(s = tau,simulations = values_to_use, lags = lags_to_use,
                                                      envelope = envelope)#, initial_guess = initial_guess)
                                               

            d_gmm[(lags_to_use,n_to_use)] = {'envelope_params':envelope_params,'levy_seed_params': levy_seed_params_list[n_index]}
            
    end_gmm = time.time()
    with open("text.txt","a") as file:
        file.write('gmm fitting finished, time taken: ' + str((end_gmm - start_gmm)//60) + ' minutes \n')      
    
    	        
    #fit the cl model
    for lags_index in range(len(lags_list)):
        start_current_lag = time.time()        
        
        for n_index in range(len(n_values)):
            with open("text.txt","a") as file:
                file.write('lags_index is: ' + str(lags_index) +'\n')
                file.write('n_index is: ' + str(n_index))


            #keep track of parameters and loss: not at the moment
            results_list     = []
            #loss_bfgs       = []
            #parameters_bfgs = []
            #hessian_list    = []
            
            for simulation_to_use in range(nr_simulations): 
                with open("text.txt","a") as file:

                    file.write('simulation ' + str(simulation_to_use) + ' / ' + str(nr_simulations) +'\n')
            
                 

                lags_to_use   = lags_list[lags_index] 
                n_to_use      = n_values[n_index]
                values_to_use = all_values_not_to_use_in_general[simulation_to_use,:n_to_use]  
                
                #initialize model parameters with gmm result
                _ = d_gmm[(lags_to_use,n_to_use)] 
                #print(_['levy_seed_params'][simulation_to_use])
                
                #initial_tensor = np.concatenate([[_['levy_seed_params'][simulation_to_use][0],
                #                                 1/_['levy_seed_params'][simulation_to_use][1]],
                 #                               _['envelope_params'][simulation_to_use]])
                    
                #initial_log_tensor = jnp.log(initial_tensor.copy())
                
                initial_tensor = np.concatenate([[_['levy_seed_params'][simulation_to_use][0],
                                                 np.log(_['levy_seed_params'][simulation_to_use][1])],
                                                np.log(_['envelope_params'][simulation_to_use])])       
                
                initial_transformed_tensor = jnp.array(initial_tensor.copy())

                

                try:
                    resdd, key = do_modified_bfgs(trawl_path = values_to_use, envelope = envelope,
                                    tau = tau, nr_mc_samples_per_batch = nr_mc_samples_per_batch, nr_batches = nr_batches,
                                    max_taylor_deg = max_taylor_deg, key = key, lags_list = lags_to_use,x0 = initial_transformed_tensor, 
                                    max_iter_at_once_bfgs = max_iter_at_once_bfgs, max_batches_bfgs = max_batches_bfgs)
                    
                    results_list.append(resdd)

                except ValueError:
                    for splitting_index in range(100):
                        key, subkey = jax.random.split(key)
                    try:
                        resdd, key = do_modified_bfgs(trawl_path = values_to_use, envelope = envelope,
                                    tau = tau, nr_mc_samples_per_batch = nr_mc_samples_per_batch, nr_batches = nr_batches,
                                    max_taylor_deg = max_taylor_deg, key = key, lags_list = lags_to_use,x0 = initial_transformed_tensor, 
                                    max_iter_at_once_bfgs = max_iter_at_once_bfgs, max_batches_bfgs = max_batches_bfgs)
                        
                        results_list.append(resdd)

                    except ValueError:
                        with open("text.txt","a") as file:
                            file.write('simulation ' + str(simulation_to_use) +  ' is very problematic')
                        results_list.append(np.nan)

                        #loss_bfgs_to_add,parameters_bfgs_to_add = np.nan,initial_tensor.copy()


                #loss_bfgs.append(loss_bfgs_to_add)
                #parameters_bfgs.append(parameters_bfgs_to_add)


            #d_cl[(lags_to_use,n_to_use)] = {'loss':loss_bfgs,'params': parameters_bfgs}
            d_cl[(lags_to_use,n_to_use)] = results_list

        
        end_current_lag = time.time()      
        
        with open("text.txt","a") as file:
            file.write('current lags time was: ' + str((end_current_lag - start_current_lag)//60) + '\n')
        
    end_cl = time.time()
    with open("text.txt","a") as file:
        file.write('cl fitting finished, time taken: ' +  str((end_cl - end_gmm)//60) + ' minutes \n')     
    with open("cl_dictionary.pickle", "wb") as output_file_cl:
        pickle.dump(d_cl, output_file_cl)
    with open("gmm_dictionary.pickle","wb") as output_file_gmm:
        pickle.dump(d_gmm, output_file_gmm)

    #write cl_time to disk
    cl_time = [end_current_lag - end_gmm]
    with open("cl_time.pickle", "wb") as output_cl_time:
        pickle.dump(cl_time, output_cl_time)



                


2023-06-19 14:07:06.800350: E external/org_tensorflow/tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


Default precision is:  <class 'jax.numpy.float64'>


In [2]:
#generic imports
import numpy as np
import time

#ambit stochastics imports
from generate_trawls_with_gaussian_marginal import generate_gaussian_seed_trawls
#gmm fitting of envelope params + gmm / mle for levy seed params
from ambit_stochastics.helpers.marginal_distribution_functions import fit_trawl_marginal
from ambit_stochastics.helpers.acf_functions import fit_trawl_envelope_gmm
import pickle

#jax imports
from jax.config import config
config.update("jax_enable_x64", True)
from bfgs_for_cl_helper import do_modified_bfgs
from   jax import random
import jax.numpy as jnp
import jax

default_precision = jnp.float64
print('Default precision is: ',default_precision)


2023-06-21 16:49:16.889726: W external/org_tensorflow/tensorflow/tsl/platform/default/dso_loader.cc:67] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-06-21 16:49:16.930203: W external/org_tensorflow/tensorflow/tsl/platform/default/dso_loader.cc:67] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-06-21 16:49:16.932194: W external/org_tensorflow/tensorflow/tsl/platform/default/dso_loader.cc:67] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory


Default precision is in jax_aux file is:  <class 'jax.numpy.float64'>
Default precision is:  <class 'jax.numpy.float64'>


In [377]:
import os
TRUE_GAUSSIAN_PARAMS = (3, 5)
TRUE_ENVELOPE_PARAMS = (0.1,)
envelope = 'exponential'

In [430]:
sim_to_use = 4
with open(os.path.join('lambda_0_1','gmm_dictionary.pickle'), 'rb') as f:
    x_gmm = pickle.load(f)
env_infer_gmm = list(x_gmm.values())[sim_to_use]['envelope_params']
levy_seed_infer_gmm = list(x_gmm.values())[sim_to_use]['levy_seed_params']
#env_infer_gmm, levy_seed_infer_gmm


In [431]:
with open(os.path.join('lambda_0_1','cl_dictionary.pickle'), 'rb') as f:
    x_cl = pickle.load(f)
x_cl = list(x_cl.values())[sim_to_use]
levy_seed_infer_cl = np.array([[i.x[0],np.exp(i.x[1])] for i in x_cl])
env_infer_cl = np.array([np.exp(i.x[2:]) for i in x_cl])
#env_infer_cl,
#levy_seed_infer_cl

In [432]:
import matplotlib.pyplot as plt

with open(os.path.join('lambda_0_1','cl_dictionary.pickle'), 'rb') as f:
    x_cl = pickle.load(f)
#x_cl
from scipy import stats

stats.describe(100 *(env_infer_cl -env_infer_gmm) / env_infer_gmm)

DescribeResult(nobs=100, minmax=(array([-22.51754825]), array([33.83887847])), mean=array([-3.23419469]), variance=array([83.93208537]), skewness=array([0.75339526]), kurtosis=array([1.94692807]))

In [433]:
from scipy.integrate import quad
def evaluate_acf_loss(envelope,envelope_params,TRUE_ENVELOPE_PARAMS,k):
  envelope_combined_loss = []
  for i in range(len(envelope_params)):
        func_sq_er  = lambda s : (corr_np(s,envelope,envelope_params[i]) - corr_np(s,envelope,TRUE_ENVELOPE_PARAMS))**2 / (k*s**2+1)
        func_abs_er = lambda s : abs(corr_np(s,envelope,envelope_params[i]) - corr_np(s,envelope,TRUE_ENVELOPE_PARAMS)) /(k*s**2+1)
        
        int_sq_er = quad(func_sq_er,0,np.inf,limit = 1000,maxp1=1000, limlst=1000,epsabs=1.49e-04, epsrel=1.49e-04)[0]
        int_abs_er= quad(func_abs_er,0,np.inf,limit = 1000,maxp1=1000, limlst=1000,epsabs=1.49e-04, epsrel=1.49e-04)[0]
        envelope_combined_loss.append([int_sq_er**0.5,int_abs_er])
  return envelope_combined_loss

r_gmm = evaluate_acf_loss(envelope,env_infer_gmm,TRUE_ENVELOPE_PARAMS,0.00001)
r_cl  = evaluate_acf_loss(envelope,env_infer_cl,TRUE_ENVELOPE_PARAMS,0.00001)

In [434]:
np.mean(r_cl,axis=0) /np.mean(r_gmm,axis=0),np.median(r_cl,axis=0) /np.median(r_gmm,axis=0)


(array([0.95052541, 0.96389776]), array([0.9580828 , 0.95891937]))

In [435]:
with open(os.path.join('lambda_0_1','cl_dictionary.pickle'), 'rb') as f:
    x_cl = pickle.load(f)

In [436]:
np.mean(np.abs(env_infer_cl - TRUE_ENVELOPE_PARAMS)**2,axis=0)/np.mean(np.abs(env_infer_gmm - TRUE_ENVELOPE_PARAMS)**2,axis=0)

array([0.67874262])

In [437]:
np.median(np.abs(env_infer_cl - TRUE_ENVELOPE_PARAMS),axis=0)/np.median(np.abs(env_infer_gmm - TRUE_ENVELOPE_PARAMS),axis=0),np.median(np.abs(env_infer_gmm - TRUE_ENVELOPE_PARAMS)) /TRUE_ENVELOPE_PARAMS 

(array([0.9010087]), array([0.19194658]))

In [438]:
np.mean(np.abs(env_infer_cl - TRUE_ENVELOPE_PARAMS),axis=0)/np.mean(np.abs(env_infer_gmm - TRUE_ENVELOPE_PARAMS),axis=0)

array([0.89181823])

In [439]:
np.mean(np.abs(levy_seed_infer_cl - TRUE_GAUSSIAN_PARAMS)**2,axis=0)/np.mean(np.abs(levy_seed_infer_gmm - TRUE_GAUSSIAN_PARAMS)**2,axis=0)

array([0.98895992, 0.99687871])

In [440]:
np.mean(np.abs(levy_seed_infer_cl - TRUE_GAUSSIAN_PARAMS),axis=0)/np.mean(np.abs(levy_seed_infer_gmm - TRUE_GAUSSIAN_PARAMS),axis=0)

array([0.9983537 , 0.99680582])

In [441]:
np.median(np.abs(levy_seed_infer_cl - TRUE_GAUSSIAN_PARAMS),axis=0)/np.median(np.abs(levy_seed_infer_gmm - TRUE_GAUSSIAN_PARAMS),axis=0)

array([0.96443978, 1.03613012])

In [442]:
def kl_div(mu_1,sigma_1,mu_2,sigma_2):
    return np.log(sigma_2/sigma_1)+ (sigma_1**2+(mu_1-mu_2)**2)/(2*sigma_2**2)-1/2

r_cl = kl_div(TRUE_GAUSSIAN_PARAMS[0],TRUE_GAUSSIAN_PARAMS[1],levy_seed_infer_cl[:,0],levy_seed_infer_cl[:,1])
r_gmm = kl_div(TRUE_GAUSSIAN_PARAMS[0],TRUE_GAUSSIAN_PARAMS[1],levy_seed_infer_gmm[:,0],levy_seed_infer_gmm[:,1])


In [443]:
r_cl.sum()/ r_gmm.sum(), np.median(r_cl) / np.median(r_gmm)

(0.995356716905765, 1.0089398737396726)

In [370]:
#((r_cl**2).sum()/(r_gmm**2).sum())**0.5

1.123375530167354

1.1362208576196877