In [1]:
import numpyro

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
# Basic packages
import json
import pandas as pd
# import matplotlib.pyplot as plt
# import matplotlib as mpl
import numpy as np
import os

# JAX-related
import jax
from jax import random, lax
import jax.numpy as jnp
from jax.scipy.stats import norm

# Numpyro-related
import numpyro
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS, HMC

numpyro.enable_x64()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
path_data = os.getcwd() + os.sep + 'data_twodim' + os.sep

im_string = 'T0_300'
gmm_combo = 'GMM1'

In [3]:
dfstations = pd.read_csv(path_data + 'stations_im' + im_string + '_' + gmm_combo + '.csv')
dfsurvey = pd.read_csv(path_data + 'survey_im' + im_string + '_' + gmm_combo + '.csv') 

In [4]:
from modules.spatialcorrelation import EspositoIervolino2012
from modules.utils import Sites

In [66]:
df = pd.read_csv(path_data + 'stations_im' + im_string + '_' + gmm_combo + '.csv')
station_sites = Sites(
    coordinates = df[['Longitude', 'Latitude']].values,
    mu_logIM = df['mu_logIM'].values,
    tau_logIM = df['tau_logIM'].values,
    phi_logIM = df['phi_logIM'].values,
)
obs_logIM = df['obs_logIM'].values

df = pd.read_csv(path_data + 'survey_im' + im_string + '_' + gmm_combo + '.csv')
survey_sites = Sites(
    coordinates = df[['Longitude', 'Latitude']].values,
    mu_logIM = df['mu_logIM'].values,
    tau_logIM = df['tau_logIM'].values,
    phi_logIM = df['phi_logIM'].values,
)

In [6]:
SCM = EspositoIervolino2012(im_string, dataset='it')

In [24]:
idx = dfsurvey[np.isin(dfsurvey.BuildingClass.values, ['A-L', 'A-MH'])].index
dfsurvey.loc[idx, 'BuildingClass2'] = 'A'
idx = dfsurvey[np.isin(dfsurvey.BuildingClass.values, ['B-L', 'B-MH'])].index
dfsurvey.loc[idx, 'BuildingClass2'] = 'B'
idx = dfsurvey[np.isin(dfsurvey.BuildingClass.values, ['C1-L', 'C1-MH'])].index
dfsurvey.loc[idx, 'BuildingClass2'] = 'C'

In [56]:
dfsam1 = dfsurvey[dfsurvey.PRO_COM==66049].groupby('BuildingClass2').sample(800, random_state=0)
dfsam2 = dfsurvey[dfsurvey.PRO_COM!=66049].groupby('BuildingClass2').sample(600, random_state=99)
dfsam = pd.concat([dfsam1, dfsam2])

In [67]:
df = dfsam.copy()
survey_sites = Sites(
    coordinates = df[['Longitude', 'Latitude']].values,
    mu_logIM = df['mu_logIM'].values,
    tau_logIM = df['tau_logIM'].values,
    phi_logIM = df['phi_logIM'].values,
)

In [68]:
from modules.shakemap import GPR

gpr = GPR(SCM=SCM)
gpr.fit(station_sites, obs_logIM, jitter=1e-4)

In [69]:
mu_B_S, Sigma_BB_S = gpr.predict(survey_sites, full_cov=True)

In [90]:
rng = np.random.default_rng(seed=31)
sim_logIM = rng.multivariate_normal(mu_B_S, Sigma_BB_S)

In [91]:
thetas_true = np.array([[0.25, 0.41, 0.66],
                        [0.37, 0.65, 1.05],
                        [0.52, 0.93,  1.49]])
betas_true = np.array([[0.59],[0.75], [0.75]])

In [92]:
building_class = pd.Categorical(dfsam.BuildingClass2.copy(), categories=['A','B','C'], ordered=True)

In [94]:
from scipy.stats import norm
rng = np.random.default_rng(seed=91)
betas = betas_true[building_class.codes]
etas = 1/betas * np.log(thetas_true[building_class.codes])
ccdf = [np.ones_like(sim_logIM)]
ccdf.append(norm.cdf(sim_logIM/betas[:,0] - etas[:,0]))
ccdf.append(norm.cdf(sim_logIM/betas[:,0] - etas[:,1]))
ccdf.append(norm.cdf(sim_logIM/betas[:,0] - etas[:,2]))
ccdf.append(np.zeros_like(sim_logIM))
cdf = 1-np.stack(ccdf)
helpuni = rng.uniform(0,1,size=len(sim_logIM))
sam_ds = np.zeros_like(sim_logIM)
for ds in range(4):
    sam_ds += ((helpuni > cdf[ds,:]) & (helpuni <= cdf[ds+1,:])) * ds

In [95]:
dfsam['samDS'] = sam_ds.astype(int)

In [89]:
dfsam.groupby(['BuildingClass2', 'samDS']).size()

BuildingClass2  samDS
A               0        613
                1        244
                2        275
                3        268
B               0        796
                1        262
                2        153
                3        189
C               0        957
                1        226
                2        115
                3        102
dtype: int64

In [97]:
dfsam.groupby(['BuildingClass2', 'samDS']).size()

BuildingClass2  samDS
A               0        569
                1        220
                2        269
                3        342
B               0        773
                1        264
                2        182
                3        181
C               0        934
                1        214
                2        148
                3        104
dtype: int64

In [100]:
dfsam['id'] = np.arange(len(dfsam))
dfsam

Unnamed: 0,id,Longitude,Latitude,vs30,epiazimuth,mu_logIM,tau_logIM,phi_logIM,BuildingClass,DamageState,PRO_COM,BuildingClass2,samDS
30175,0,13.430660,42.302485,529.93370,44.626553,-0.565073,0.501964,0.66775,A-L,5,66049.0,A,0
8012,1,13.288765,42.396429,509.20395,164.165307,-0.677659,0.501964,0.66775,A-L,1,66049.0,A,3
32034,2,13.365190,42.365922,450.50436,169.837767,-0.565073,0.501964,0.66775,A-L,3,66049.0,A,3
14428,3,13.402773,42.348720,453.86163,177.774937,-0.565073,0.501964,0.66775,A-MH,2,66049.0,A,1
16642,4,13.400559,42.352410,455.24070,179.724174,-0.565073,0.501964,0.66775,A-MH,4,66049.0,A,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
52275,4195,13.395423,41.972135,536.46490,44.474262,-2.146303,0.501964,0.66775,C1-MH,0,66020.0,C,0
54050,4196,13.390121,41.963362,622.23680,44.986776,-2.188677,0.501964,0.66775,C1-L,0,66020.0,C,0
27547,4197,13.567571,42.245930,425.86472,347.998133,-0.677665,0.501964,0.66775,C1-MH,0,66042.0,C,2
49232,4198,13.412729,42.059801,431.81610,43.196936,-1.681242,0.501964,0.66775,C1-L,0,66006.0,C,0


In [102]:
dfsurvey2 = dfsam[['id', 'Longitude', 'Latitude', 'vs30']].copy()
dfsurvey2['BuildingClass'] = dfsam['BuildingClass2'].values
dfsurvey2['DamageState'] = dfsam['samDS'].values

In [104]:
dfsurvey2.to_csv(path_data + 'survey2.csv', index=False)