## Inference setup, model setup
In order to perform inference (Bayesian or Maximum Likelihood)
one needs to define or setup several auxiliary variables.
This is done by creating a ***setup_obj*** for the inference which 
includes all required information of the
* Bayesian inference, i.e., Gibbs sampling procedure with Gaussian process 
modelling of the background intensity (GP-ETAS, gpetas) or
* classical Maximum Likelihood estimation (MLE) using a kernel density estimator 
for the background intensity

### Setup object for Bayesian inference (Gibbs sampler)

In [1]:
import gpetas
import numpy as np
import datetime
time_format = "%Y-%m-%d %H:%M:%S.%f"


# auto reload using developer version
%load_ext autoreload
%autoreload 2

#### Requires a *data_obj* which includes *domain_obj*
In this example we specify first a domain in California and 
we download the data accordingly from an online source using
pycsep functionalities

In [2]:
### generate domain obj
# time domain
time_origin = '2010-01-01 00:00:00.0'
time_end = '2022-01-01 00:00:00.0'
time_origin_obj = datetime.datetime.strptime(time_origin, time_format).replace(
                tzinfo=datetime.timezone.utc)
time_end_obj = datetime.datetime.strptime(time_end, time_format).replace(
                tzinfo=datetime.timezone.utc)
delta_Tall=(time_end_obj-time_origin_obj).total_seconds()/(60.*60.*24)
T_borders_all = np.array([0.,delta_Tall])
T_borders_training = np.array([0.,3000.])

# spatial domain
X_borders = np.array([[-120., -113.],[  30.,   37.]])

# mark domain: [m0,+inf)
m0=3.5

domain_obj = gpetas.utils.R00x_setup.region_class()
domain_obj.T_borders_all = T_borders_all
domain_obj.T_borders_training=T_borders_training
domain_obj.T_borders_testing = np.array([T_borders_training[1],T_borders_all[1]])
domain_obj.time_origin = time_origin
domain_obj.X_borders = X_borders
domain_obj.m0 = m0
# case_name (optional)
# domain_obj.case_name = case_name
vars(domain_obj)

### generate data_obj with download data from online source
# get pycsep catalog object
import csep
from csep.utils import comcat
start_time = time_origin_obj
end_time = time_end_obj
min_magnitude=domain_obj.m0
min_latitude=domain_obj.X_borders[1,0]
max_latitude=domain_obj.X_borders[1,1]
min_longitude=domain_obj.X_borders[0,0]
max_longitude=domain_obj.X_borders[0,1]
catalog_obj = csep.query_comcat(start_time=start_time, end_time=end_time, 
                        min_magnitude=min_magnitude, 
                        min_latitude=min_latitude,max_latitude=max_latitude, 
                        min_longitude=min_longitude, max_longitude=max_longitude)
# cat2data_obj
data_obj = gpetas.utils.get_data_pycsep.data_obj__from_catalog_obj(catalog_obj=catalog_obj,R_obj=domain_obj)

Fetched ComCat catalog in 42.589593172073364 seconds.

Downloaded catalog from ComCat with following parameters
Start Date: 2010-01-04 00:02:17.990000+00:00
End Date: 2021-12-19 10:27:35.704000+00:00
Min Latitude: 30.0063 and Max Latitude: 36.989
Min Longitude: -119.9986667 and Max Longitude: -113.1498
Min Magnitude: 3.5
Found 1688 events in the ComCat catalog.
jitter= 1.5046296084619826e-06
    Values at idx [695] and [696] (counting from 0 to n-1) are the same.
    Time: [968.88711481] [968.88711481]  days.
    NEW Times: [968.88711481] [968.88711632]  days.
NEW number of time duplicates= 0
----------------------------------------------------------------------
total number of events =  1688
time origin            =  2010-01-01 00:00:00+00:00
Starting time          = 0.0 time max= 4383.0
T_borders all          = [   0. 4383.]
|T|                    = 4383.0  days.
T_borders training     = [   0. 3000.]
min event time         = 3.0015971064814817
max event time         = 4370.435829907

## Generating an setup_obj for GP-ETAS (gpetas)

In [3]:
### load data_obj
case_name = 'Rxxx'
print(case_name)
output_dir = './output/inference_results'
fname = output_dir+'/data_obj_%s.all'%case_name
data_obj = np.load(fname,allow_pickle=True)


### variables of the Gibbs sampler

# sampler parameters
burnin = 50                                # number of discared initial samples. default: 5000
Ksamples = 10                              # number of samples of the joint posterior default: 500 (a few hundreds)
thinning = 20                              # default:10 # or 20:thinning of the obtained samples in order to avoid autocorrelation
num_iterations = Ksamples*thinning+1
MH_proposals_offspring = 100               # Number of MH proposals for offspring params
MH_cov_empirical_yes = None                # using empirical cov for proposal distribution
sigma_proposal_offspring_params = None     # uses default values: 0.01**2 # alternatives:0.03**2
kth_sample_obj = None                      # starting sampling from kth sample 
print('#iters',num_iterations)


# offspring
prior_theta_dist = 'gamma'                 # specifies prior distribution either 'gamma' or 'uniform'
prior_theta_params = None
theta_start_Kcpadgq = None                 # uses default values:
spatial_offspring = 'R'                    # alternatives: 'G' gaussian 
stable_theta_sampling = 'yes'              # constraint on theta that only stable Hawkes processes are allowed


# bg: 
cov_params = None                          # start values of hypers, uses default: silverman rule
mu_nu0 = None                              # mean of hyper prior on nu_0, uses default value:



# bg: spatial resolution for plotting/evaluations
ngrid_per_dim = 50                         # default value: 50
X_grid = gpetas.some_fun.make_X_grid(data_obj.domain.X_borders, nbins=ngrid_per_dim)
                                           # generates spatial grid for plotting etc.
    
# general 
time_origin = data_obj.domain.time_origin
case_name = data_obj.case_name
    
# save results
outdir = output_dir




### (3.1) GS sampler  set up
setup_obj = gpetas.setup_Gibbs_sampler.setup_sampler(data_obj=data_obj,
             utm_yes=None,
             spatial_offspring=spatial_offspring,
             theta_start_Kcpadgq=theta_start_Kcpadgq,
             sigma_proposal_offspring_params=sigma_proposal_offspring_params,
             ngrid_per_dim=ngrid_per_dim,
             cov_params=cov_params,
             mu_nu0=None,
             X_grid=X_grid,
             outdir=outdir,
             prior_theta_dist=prior_theta_dist,
             prior_theta_params=prior_theta_params,
             stable_theta_sampling=stable_theta_sampling,
             time_origin=time_origin,
             case_name=case_name,
             burnin=burnin, 
             Ksamples=Ksamples,
             num_iterations=num_iterations,
             thinning=thinning,
             MH_proposals_offspring=MH_proposals_offspring,
             MH_cov_empirical_yes=MH_cov_empirical_yes,
             kth_sample_obj=kth_sample_obj)


Rxxx
#iters 201
Output subdirectory exists
setup_obj has been created and saved: ./output/inference_results/setup_obj_Rxxx.all


In [4]:
vars(setup_obj)

{'utm_yes': None,
 'N_training': 998,
 'mu0_start': 0.003394557823129252,
 'mu0_grid': None,
 'm0': 3.5,
 'm_beta': 2.3977128030175625,
 'theta_start_Kcpadgqbm0': array([2.5000000e-03, 1.0000000e-02, 1.2000000e+00, 1.8000000e+00,
        5.0000000e-02, 5.0000000e-01, 2.0000000e+00, 2.3977128e+00,
        3.5000000e+00]),
 'spatial_offspring': 'R',
 'sigma_proposal_offspring_params': 0.0001,
 'cov_params': [5.0, array([0.37814954, 0.37814954])],
 'cov_params_start': [5.0, array([0.37814954, 0.37814954])],
 'mu_upper_bound': None,
 'std_factor': 1.0,
 'mu_nu0': None,
 'mu_length_scale': None,
 'sigma_proposal_hypers': 0.05,
 'X_grid': array([[-120.        ,   30.        ],
        [-119.85714286,   30.        ],
        [-119.71428571,   30.        ],
        ...,
        [-113.28571429,   37.        ],
        [-113.14285714,   37.        ],
        [-113.        ,   37.        ]]),
 'X_grid_NN': array([[0.        , 0.        ],
        [0.14285714, 0.        ],
        [0.28571429, 0. 

***

***

### Setup object for classical MLE (KDE-ETAS)

#### Requires a *data_obj* which includes *domain_obj*
In this example we load a previously generated *data_obj* and
we also load a previously generated *setup_obj* of the Gibbs sampler
in order to generate a *setup_obj_mle* comparable to the Gibbs sampler
setup.

In [14]:
### load data_obj
case_name = 'Rxxx'
print(case_name)
output_dir = './output/inference_results'
fname = output_dir+'/data_obj_%s.all'%case_name
data_obj = np.load(fname,allow_pickle=True)

### load setup_obj from the GP-ETAS sampler
fname = output_dir+'/setup_obj_%s.all'%case_name
setup_obj = np.load(fname,allow_pickle=True)

### variables of the MLE procedure (EM)

# bg KDE parameters
Nnearest=15                                     # default value: 15
h_min_degree=0.05                               # default value: 0.05 in degrees
silverman = None                                # default value: None #alternative 'yes'; minimal bandwith via Silverman Rule yes or no(None)
bins = int(np.sqrt(setup_obj.X_grid.shape[0]))  # default value: 50
X_grid = setup_obj.X_grid

# offspring
stable_theta = setup_obj.stable_theta_sampling
theta_start_Kcpadgqbm0 = setup_obj.theta_start_Kcpadgqbm0  # uses default values for a stabil Hawkes process:
spatial_offspring = setup_obj.spatial_offspring

# save setup_obj_mle
outdir = setup_obj.outdir 
case_name = setup_obj.case_name

# create setup_obj_mle
setup_obj_mle = gpetas.mle_KDE_ETAS.setup_mle(data_obj=data_obj,
                    theta_start_Kcpadgqbm0=theta_start_Kcpadgqbm0,
                    spatial_offspring=spatial_offspring,
                    Nnearest=Nnearest,
                    h_min_degree=h_min_degree,
                    spatial_units='degree',
                    utm_yes=None,
                    bins=bins,
                    X_grid=X_grid,
                    outdir=outdir,
                    stable_theta=stable_theta,
                    case_name=case_name,
                    silverman=silverman)

Rxxx
./output/inference_results subdirectory exists
mle_setup_obj has been created and saved: ./output/inference_results/setup_obj_default_Rxxx_mle.all


In [15]:
vars(setup_obj_mle)

{'data_obj': <gpetas.utils.some_fun.create_data_obj_from_cat_file at 0x7ff68b37d640>,
 'utm_yes': None,
 'spatial_units': 'degree',
 'N_training': 998,
 'absX_training': 49.0,
 'absT_training': 3000.0,
 'case_name': 'Rxxx',
 'theta_start_Kcpadgqbm0': array([2.5000000e-03, 1.0000000e-02, 1.2000000e+00, 1.8000000e+00,
        5.0000000e-02, 5.0000000e-01, 2.0000000e+00, 2.3977128e+00,
        3.5000000e+00]),
 'm0': 3.5,
 'm_beta': 2.3977128030175625,
 'spatial_offspring': 'R',
 'mu_start': 0.003394557823129252,
 'X_borders_NN': array([[0., 7.],
        [0., 7.]]),
 'X_grid': array([[-120.        ,   30.        ],
        [-119.85714286,   30.        ],
        [-119.71428571,   30.        ],
        ...,
        [-113.28571429,   37.        ],
        [-113.14285714,   37.        ],
        [-113.        ,   37.        ]]),
 'bins': 50,
 'Nnearest': 15,
 'h_min_degree': 0.05,
 'dLL': 10.0,
 'stable_theta': 'yes',
 'outdir': './output/inference_results'}