In [2]:
from obspy.core import Stream, read
import numpy as np
from pytensor.compile.ops import as_op
import pytensor
import pytensor.tensor as pt
import matplotlib.pyplot as plt
import pandas as pd
import os 
import os.path
import pymc as pm
import arviz as az

In [3]:
# What exactly are the colunns here? Amplitude of waves at a specific time?
# My equivalent = peak strain at a specific time

# Build the target function, misfit to this is what is being minimized
@as_op(itypes=[pt.dvector,pt.dscalar,pt.dscalar,pt.dscalar,pt.dscalar,pt.dscalar], otypes=[pt.dvector]) # input variable types and output variable type(s)
def two_straight_lines(x,m1,m2,xinter,x0,y0):
    '''
    input x coordiantes are in x
    slopes are m1 and m2
    intercept of left hand line is b1 
    intersection of two lines is at xinter
    
    Note that y intercept of second straight line is dependent on b1 and xinter
      and defined entirely by them (so that the lines touch).
    '''
    
    # Output vector
    y_out = np.ones(len(x))
    
    # Before building the first straight line, calculate the intercept
    b1 = y0 - m1*x0
    
    # Build first straight line segment
    y_out = m1*x + b1
    
    # Find points that are after the intersection and make the second segment
    i = np.where(x>xinter)[0]
    
    # Define second y intercept
    b2 = m1*xinter + b1 - m2*xinter
    # print(b2)
    
    # Make second straight line
    y_out[i] = m2*x[i] + b2

    return y_out

# Build the target function, misfit to this is what is being minimized
def non_pytensor_two_straight_lines(x,m1,m2,xinter,x0,y0):
    '''
    input x coordiantes are in x
    slopes are m1 and m2
    intercept of left hand line is b1 
    intersection of two lines is at xinter
    
    Note that y intercept of second straight line is dependent on b1 and xinter
      and defined entirely by them (so that the lines touch).
    '''
    
    # Output vector
    yout = np.ones(len(x))
    
    # Before building the first straight line, calculate the intercept
    b1 = y0 - m1*x0
    
    # Build first straight line segment
    yout = m1*x + b1
    
    # Find points that are after the intersection and make the second segment
    i = np.where(x>xinter)[0]
    
    # Define second y intercept
    b2 = m1*xinter + b1 - m2*xinter
    
    # Make second straight line
    yout[i] = m2*x[i] + b2

    return yout

In [4]:
path = '/Users/sydneydybing/StrainProject/2024/taiwan_strain_data/'
events = pd.read_csv(path + 'events_csv.csv')
snr_array = pd.read_csv(path + 'station_data_hypdists_snrs_uncaldata_newdetrend.csv')
snr_array

Unnamed: 0,Sta,Lat,Lon,Depth(m),2009_hyp_dist_km,2013_hyp_dist_km,2018_hyp_dist_km,snrs_2009,snrs_2013,snrs_2018
0,BCWB,24.2184,121.6891,-192,69.496535,74.182696,7.06664,-,-,-
1,TRKB,24.1614,121.6127,-192,61.088805,65.767547,9.697232,-,716.758113648,927.452502491
2,NTTB,24.1728,121.5259,-175,60.587926,65.18078,18.587874,236.768783247,-,-
3,SJNB,24.1073,121.6006,-193,54.988063,59.668808,12.2263,-,828.880188996,43.9678676759
4,HGSB,23.4903,121.4237,-266,15.739505,11.125139,79.319331,608.943857983,204.044314692,158.851608405
5,CHMB,23.4868,121.4703,-191,15.969342,11.924641,78.079695,-,157.973829385,26.1116313204
6,SSNB,23.4423,121.3981,-185,21.504342,16.833483,85.224271,-,302.72538086,45.4266213541
7,ZANB,23.3947,121.4585,-218,26.068219,21.740867,88.175343,519.134478753,1012.07605721,22.0643156866
8,FBRB,23.1406,121.2796,-200,56.994188,52.307853,120.756809,64.2673561839,133.041575329,79.1779564701
9,SSTB,23.134,121.3782,-199,55.447392,50.858196,118.191393,-,243.922102017,24.7749979216


In [5]:
events

Unnamed: 0,evt_id,origin_time,lat,lon,depth
0,2009-10-03_M6.1,2009-10-03T17:36:06.120,23.63,121.452,28.0
1,2013-10-31_M6.3,2013-10-31T12:02:08.720,23.59,121.437,10.0
2,2018-02-04_M6.1,2018-02-04T13:56:41.950,24.157,121.708,12.0


In [6]:
events_list = []
mags_list = []
stas_list = []
hypdist_list = []
xhighs_list = []
m1s_list = []
m2s_list = []
xinters_list = []
sigmas_list = []
trans_time_list = []

for ii in range(len(events)):
    
    evt_id = events['evt_id'][ii]
    print(evt_id)

    for idx in range(len(snr_array)):

        sta = snr_array['Sta'][idx]
        
        # Figure out the bounds for the MCMC based on the event and hypocentral distance (rainbow plots)
        xlow = 10
        
        if evt_id == '2009-10-03_M6.1':
            mag = '6.1'
            hypdist = snr_array['2009_hyp_dist_km'][idx]
            if sta == 'ZANB':
                xhigh = 17
            elif sta == 'NTTB' or sta == 'HGSB':
                xhigh = 22
            elif sta == 'FBRB':
                xhigh = 29
        elif evt_id == '2013-10-31_M6.3':
            mag = '6.3'
            hypdist = snr_array['2013_hyp_dist_km'][idx]
            if sta == 'FBRB' or sta == 'SSTB' or sta == 'DONB':
                xhigh = 30
            elif sta == 'HGSB' or sta == 'CHMB' or sta == 'ZANB':
                xhigh = 20
            elif sta == 'TRKB' or sta == 'SJNB':
                xhigh = 25
            elif sta == 'SSNB':
                xhigh = 15
            else:
                print('2013 sta typo ' + str(sta))
        elif evt_id == '2018-02-04_M6.1':
            mag = '6.1'
            hypdist = snr_array['2018_hyp_dist_km'][idx]
            if sta == 'TRKB' or sta == 'SJNB':
                xhigh = 13
            elif sta == 'HGSB' or sta == 'CHMB' or sta == 'SSNB' or sta == 'ZANB':
                xhigh = 35
            elif sta == 'FBRB' or sta == 'SSTB':
                xhigh = 55
            else:
                print('2018 sta typo ' + str (sta))
        
        int_hypdist = int(hypdist)
        
        # if evt_id == '2013-10-31_M6.3' and sta == 'SSNB' or sta == 'SSTB':
        #     pass
        # elif evt_id == '2018-02-04_M6.1' and sta == 'TRKB':
        #     pass
        # else:
        #     continue
        
        try:
            pst = read(path + 'newdetrend_filtered_abs_peak_post_p_uncal/' + evt_id + '/' + sta + '.mseed')
        except:
            print('Skipping ' + sta + ', no data')
            continue

        print('Processing ' + str(sta))
        
        times = pst[0].times()
        data = pst[0].data
        log10_data = np.log10(data)
        samp_rate = pst[0].stats.sampling_rate
        print(samp_rate)
        
        vec_start = int(xlow*samp_rate)
        vec_end = int(xhigh*samp_rate)

        # Split into x and y vectors
        xobserved = times[vec_start:vec_end]
        yobserved = log10_data[vec_start:vec_end]
        x0 = xobserved[0]
        y0 = yobserved[0]
        
        # in order to pass the x variable into the target function it needs to be 
        # converted to a Theano "shared" variable
        pt_xobserved = pytensor.shared(xobserved)
        pt_x0 = pytensor.shared(x0)
        pt_y0 = pytensor.shared(y0)
            
        # MCMC run parameters, these are good numbers for a "production" run. If you are
        # fooling arund these can be lower to iterate faster
        Nburn = 5000 # burn in samples that get discarded
        Nmcmc = 15000 # bump to at least 5-10k
        Nchains = 4
        Ncores = 1
        
        # Bounds for the prior distributions
        m1_low = -1; m1_high = 20
        m2_low = -1; m2_high = 3
        xinter_low = 10 ; xinter_high = 25 # location of the line slope change
        
        # Define the Bayesian model
        with pm.Model()as model:
            
            # Use normal distributions as priors
            m1 = pm.Uniform('m1', lower = m1_low, upper = m1_high)
            m2 = pm.Uniform('m2', lower = m2_low, upper = m2_high)
            # m1 = pm.Normal('m1', mu = 0.5, sigma = 1)
            # m2 = pm.Normal('m2', mu = -0.1, sigma = 5)
            xinter = pm.Uniform('xinter', lower = xinter_low, upper = xinter_high)
            sigma = pm.HalfCauchy('sigma', beta = 10, initval = 1)
        
            # This is the model
            likelihood = pm.Normal('y', mu = two_straight_lines(pt_xobserved,m1,m2,xinter,pt_x0,pt_y0),
                                    observed = yobserved, sigma = sigma)

            # NUTS sampler (default) is gradient based and won't work, use metropolis
            step = pm.Metropolis(scaling = 0.1)
            
            # This runs the mcmc sampler
            mcmc = pm.sample(Nmcmc, tune = Nburn, chains = Nchains, cores = Ncores, step = step)
        
        # Done, now is post-processing to get the data out of the sampler

        posterior = az.extract(mcmc)
        
        # Unwrap coeficients - WRITE LINES TO SAVE THE ARRAYS!!!
        m1_array = posterior.m1.values
        m1 = np.mean(m1_array)
        # print('m1 mean: ' + str(round(m1,2)))
        
        m2_array = posterior.m2.values
        m2 = np.mean(m2_array)
        # print('m2 mean: ' + str(round(m2,2)))
        
        xinter_array = posterior.xinter.values
        xinter = np.mean(xinter_array)
        # print('xinter Mean: ' + str(round(xinter,2)))
        
        sigma_array = posterior.sigma.values
        sigma = np.mean(sigma_array)

        # Lines!
        b1 = y0 - m1*x0
        b2 = m1*xinter + b1 - m2*xinter
        
        # Make plot to check stuff
        xpredicted = np.arange(xobserved.min(), xobserved.max()+0.1, 0.1)
        ypredicted = m1*xpredicted + b1
        i = np.where(xpredicted > xinter)[0]
        ypredicted[i] = m2*xpredicted[i] + b2
        
        # Get one-sigma region (need to obtain a ton of forward models and get stats)
        N = len(posterior.m1.values)
        
        yfit = np.zeros((len(xpredicted), N))
        for k in range(N):
            yfit[:,k] = non_pytensor_two_straight_lines(xpredicted,m1_array[k],m2_array[k],xinter_array[k],x0,y0)
        mu = yfit.mean(1)
        sig = yfit.std(1) * 1.95 #for 95% confidence
        mu_plus = mu + sig
        mu_minus = mu - sig
        
        # Least squares
        mls, bls = np.polyfit(xobserved, yobserved, 1)
        
        # Find the transition point in ypredicted
        diff = np.round(np.diff(ypredicted), decimals = 5)
        # print(diff)
        first_slope = diff[0]
        second_slope = diff[-1]
        trans_idx = np.where((diff != first_slope) & (diff != second_slope))[0][0] + 1
        
        # Get the transition point
        mcmc_trans_x = xpredicted[trans_idx] # Number
        mcmc_trans_y = ypredicted[trans_idx]
        
        # Append things
        events_list.append(evt_id)
        mags_list.append(mag)
        stas_list.append(sta)
        hypdist_list.append(int_hypdist)
        xhighs_list.append(xhigh)
        m1s_list.append(m1)
        m2s_list.append(m2)
        xinters_list.append(xinter)
        sigmas_list.append(sigma)
        trans_time_list.append(mcmc_trans_x)
        
        # Create folder for the event
        array_save_dir = path + 'mcmc_arrays_uncal_newdetrend/' + evt_id + '/' + sta + '/'
        if os.path.isdir(array_save_dir):
            pass
        else:
            os.makedirs(array_save_dir)

        np.save(array_save_dir + 'xobserved.npy', xobserved)
        np.save(array_save_dir + 'yobserved.npy', yobserved)
        np.save(array_save_dir + 'xpredicted.npy', xpredicted)
        np.save(array_save_dir + 'ypredicted.npy', ypredicted)
        np.save(array_save_dir + 'forward_mus.npy', mu)
        np.save(array_save_dir + 'foward_sigs.npy', sig)
        
        summary = az.summary(mcmc, fmt = 'wide')
        print(summary)
        summary.to_csv(array_save_dir + 'mcmc_summary.csv', index = False)
        
        # Folder for figures
        fig_save_dir = path + 'mcmc_plots_uncal_newdetrend/' + evt_id + '/'
        if os.path.isdir(fig_save_dir):
            pass
        else:
            os.makedirs(fig_save_dir)
        
        az.plot_trace(mcmc)
        # plt.show()
        plt.savefig(fig_save_dir + sta + '_stats_plots.png', format = 'PNG')
        plt.close()

        # Plot all solutions
        fig = plt.figure(dpi = 100)
        ax = fig.add_subplot(111)
        ax.set_title(str(sta))
        
        ax.plot(xobserved, yobserved, color = 'blue', label = 'Observed (log peak strain)')
        ax.plot(xpredicted, ypredicted, color = 'red', label = 'Predicted (MCMC)')
        # ax.plot(xpredicted, mu, color = 'green', linestyle = '--', label = 'yfit')
        # ax.plot(xpredicted, xpredicted * mls + bls, color = 'orange', label = 'np.polyfit least squares')
        ax.scatter(xpredicted[trans_idx], ypredicted[trans_idx], marker = 'o', color = 'black', label = 'Transition at ' + str(round(xpredicted[trans_idx],2)) + ' sec')
        # ax.scatter(closest_trans_x, closest_trans_y, marker = 'o', color = 'green', label = 'Adjusted transition at ' + str(round(closest_trans_x,2)) + ' sec')
        # ax2 = ax.twinx()
        # ax2.plot(xobserved, pythags_arr, color = 'violet')
        # ax2.axhline(closest_trans_x, color = 'violet', linestyle = '--')
        ax.fill_between(xpredicted, mu_plus, mu_minus, color = 'gray', alpha = 0.2, label = '95% confidence') # 95% confidence interval
        ax.set_xlabel('Time (s) - p-wave at 10s')
        ax.set_ylabel('log10 of peak strain')
        # ax.set_xlim(10,13)
        ax.legend()
        
        # plt.show()
            
        plt.savefig(fig_save_dir + sta + '.png', format = 'PNG')
        plt.close()

2009-10-03_M6.1
Skipping BCWB, no data
Skipping TRKB, no data
Processing NTTB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 107 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       9.841  1.573   7.730   12.687      0.764    0.588       5.0   
m2       0.104  0.002   0.100    0.109      0.001    0.001       7.0   
xinter  10.360  0.057  10.268   10.449      0.027    0.020       5.0   
sigma    0.147  0.011   0.132    0.171      0.005    0.003       5.0   

        ess_tail  r_hat  
m1          28.0   2.46  
m2          31.0   1.60  
xinter      30.0   2.46  
sigma       28.0   2.03  
Skipping SJNB, no data
Processing HGSB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 111 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       6.474  0.407   5.702    7.204      0.183    0.139       5.0   
m2       0.099  0.003   0.094    0.105      0.001    0.001      10.0   
xinter  10.667  0.046  10.589   10.755      0.021    0.015       5.0   
sigma    0.234  0.006   0.223    0.247      0.002    0.001      17.0   

        ess_tail  r_hat  
m1          26.0   2.37  
m2          54.0   1.31  
xinter      26.0   2.35  
sigma       37.0   1.15  
Skipping CHMB, no data
Skipping SSNB, no data
Processing ZANB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 109 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1      18.238  1.787  14.739   19.999      0.772    0.591       5.0   
m2       0.125  0.008   0.108    0.138      0.003    0.002      10.0   
xinter  10.247  0.029  10.218   10.305      0.012    0.009       5.0   
sigma    0.274  0.011   0.254    0.294      0.003    0.002      13.0   

        ess_tail  r_hat  
m1          30.0   2.00  
m2          54.0   1.33  
xinter      32.0   1.97  
sigma       36.0   1.20  
Processing FBRB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 120 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       4.207  0.659   3.390    5.417      0.324    0.248       5.0   
m2       0.049  0.002   0.045    0.053      0.001    0.001       5.0   
xinter  10.852  0.130  10.633   11.038      0.063    0.048       5.0   
sigma    0.207  0.008   0.193    0.222      0.003    0.002       6.0   

        ess_tail  r_hat  
m1          24.0   2.47  
m2          32.0   1.98  
xinter      25.0   2.47  
sigma       29.0   1.86  
Skipping SSTB, no data
Skipping DONB, no data
2013-10-31_M6.3
2013 sta typo BCWB
Skipping BCWB, no data
Processing TRKB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 121 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       7.162  1.144   5.581    9.374      0.553    0.424       5.0   
m2       0.081  0.004   0.075    0.089      0.002    0.001       6.0   
xinter  10.654  0.104  10.474   10.818      0.049    0.037       5.0   
sigma    0.317  0.009   0.301    0.334      0.003    0.002      10.0   

        ess_tail  r_hat  
m1          31.0   2.38  
m2          28.0   1.91  
xinter      30.0   2.38  
sigma       39.0   1.31  
2013 sta typo NTTB
Skipping NTTB, no data
Processing SJNB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 121 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       5.668  0.660   4.640    6.904      0.312    0.239       5.0   
m2       0.093  0.003   0.087    0.099      0.001    0.001       6.0   
xinter  10.714  0.088  10.564   10.864      0.041    0.031       5.0   
sigma    0.246  0.007   0.233    0.261      0.002    0.002       9.0   

        ess_tail  r_hat  
m1          22.0   2.38  
m2          21.0   1.64  
xinter      21.0   2.38  
sigma       33.0   1.34  
Processing HGSB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 117 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1      19.639  0.933  17.557   20.000      0.243    0.177      17.0   
m2       0.087  0.003   0.080    0.092      0.001    0.001      21.0   
xinter  10.230  0.014  10.223   10.260      0.004    0.003      16.0   
sigma    0.219  0.011   0.205    0.244      0.002    0.002      32.0   

        ess_tail  r_hat  
m1          29.0   1.16  
m2          53.0   1.13  
xinter      28.0   1.17  
sigma       30.0   1.08  
Processing CHMB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 117 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1      19.980  0.022  19.940   20.000      0.002    0.002      36.0   
m2       0.081  0.003   0.075    0.087      0.000    0.000     252.0   
xinter  10.255  0.001  10.253   10.256      0.000    0.000     178.0   
sigma    0.318  0.007   0.305    0.331      0.000    0.000   10626.0   

        ess_tail  r_hat  
m1         124.0   1.09  
m2         504.0   1.02  
xinter     442.0   1.04  
sigma    11275.0   1.00  
Processing SSNB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 118 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1      19.962  0.038  19.891   20.000      0.003    0.002      94.0   
m2       0.230  0.006   0.219    0.240      0.000    0.000     270.0   
xinter  10.177  0.001  10.176   10.179      0.000    0.000     218.0   
sigma    0.172  0.006   0.162    0.182      0.000    0.000   11746.0   

        ess_tail  r_hat  
m1         155.0   1.03  
m2         380.0   1.01  
xinter     327.0   1.02  
sigma    12915.0   1.00  
Processing ZANB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 116 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1      19.917  0.307  19.896   20.000      0.052    0.037      62.0   
m2       0.065  0.003   0.059    0.070      0.000    0.000      91.0   
xinter  10.209  0.004  10.206   10.211      0.001    0.000      89.0   
sigma    0.199  0.006   0.189    0.208      0.001    0.000     218.0   

        ess_tail  r_hat  
m1          35.0   1.05  
m2          45.0   1.04  
xinter      35.0   1.03  
sigma       63.0   1.01  
Processing FBRB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 127 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       7.991  3.990   4.499   16.134      1.965    1.501       5.0   
m2       0.081  0.003   0.076    0.087      0.001    0.001       5.0   
xinter  10.581  0.202  10.221   10.846      0.098    0.075       5.0   
sigma    0.229  0.024   0.188    0.267      0.012    0.009       5.0   

        ess_tail  r_hat  
m1          11.0   2.82  
m2          19.0   2.10  
xinter      11.0   2.82  
sigma       19.0   2.45  
Processing SSTB
100.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 122 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1      12.222  4.589   6.871   19.990      2.259    1.737       5.0   
m2       0.074  0.001   0.072    0.077      0.000    0.000       7.0   
xinter  10.306  0.104  10.163   10.486      0.049    0.037       5.0   
sigma    0.165  0.039   0.106    0.233      0.019    0.014       5.0   

        ess_tail  r_hat  
m1          11.0   2.75  
m2          43.0   1.54  
xinter      16.0   2.69  
sigma       21.0   2.54  
Processing DONB
20.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 109 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       4.510  1.504   2.003    7.440      0.705    0.534       5.0   
m2       0.082  0.002   0.079    0.086      0.000    0.000      13.0   
xinter  10.435  0.157  10.195   10.707      0.072    0.055       5.0   
sigma    0.173  0.008   0.159    0.188      0.002    0.002      12.0   

        ess_tail  r_hat  
m1          11.0   2.65  
m2          65.0   1.22  
xinter      11.0   2.65  
sigma       45.0   1.22  
2018-02-04_M6.1
2018 sta typo BCWB
Skipping BCWB, no data
Processing TRKB
20.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 105 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       8.232  1.005   6.365   10.135      0.102    0.072      99.0   
m2       0.624  0.029   0.570    0.680      0.001    0.001     841.0   
xinter  10.185  0.026  10.138   10.235      0.003    0.002      95.0   
sigma    0.173  0.016   0.142    0.203      0.000    0.000    6276.0   

        ess_tail  r_hat  
m1         150.0   1.03  
m2        2710.0   1.01  
xinter     151.0   1.04  
sigma    11263.0   1.00  
2018 sta typo NTTB
Skipping NTTB, no data
Processing SJNB
20.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 107 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       3.246  1.371   2.153    5.897      0.592    0.444       8.0   
m2       0.449  0.049   0.375    0.544      0.019    0.015       8.0   
xinter  10.465  0.160  10.162   10.637      0.072    0.054       8.0   
sigma    0.131  0.013   0.108    0.155      0.000    0.000    4629.0   

        ess_tail  r_hat  
m1          28.0   1.49  
m2          34.0   1.45  
xinter      28.0   1.49  
sigma     9739.0   1.00  
Processing HGSB
20.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 112 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       4.354  0.428   3.354    5.022      0.193    0.147       5.0   
m2       0.045  0.002   0.042    0.048      0.000    0.000      18.0   
xinter  10.788  0.088  10.637   10.966      0.038    0.029       5.0   
sigma    0.212  0.008   0.199    0.227      0.001    0.001      39.0   

        ess_tail  r_hat  
m1          14.0   2.46  
m2          47.0   1.16  
xinter      14.0   2.48  
sigma       51.0   1.06  
Processing CHMB
20.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 109 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       2.692  0.372   2.100    3.355      0.178    0.136       5.0   
m2       0.054  0.001   0.051    0.057      0.001    0.000       8.0   
xinter  10.938  0.145  10.724   11.192      0.068    0.052       5.0   
sigma    0.150  0.007   0.139    0.164      0.002    0.001      11.0   

        ess_tail  r_hat  
m1          20.0   2.72  
m2          28.0   1.44  
xinter      20.0   2.73  
sigma       26.0   1.25  
Processing SSNB
20.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 113 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       2.249  0.181   1.867    2.540      0.078    0.059       5.0   
m2       0.056  0.001   0.054    0.058      0.000    0.000      16.0   
xinter  11.013  0.087  10.862   11.201      0.037    0.028       5.0   
sigma    0.163  0.005   0.154    0.174      0.000    0.000     698.0   

        ess_tail  r_hat  
m1          16.0   1.98  
m2          99.0   1.19  
xinter      16.0   2.02  
sigma     9431.0   1.01  
Processing ZANB
20.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 116 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       3.669  0.563   2.737    4.678      0.270    0.207       5.0   
m2       0.041  0.001   0.039    0.042      0.000    0.000      12.0   
xinter  10.632  0.102  10.470   10.820      0.048    0.036       5.0   
sigma    0.105  0.007   0.094    0.121      0.003    0.002       7.0   

        ess_tail  r_hat  
m1          25.0   2.65  
m2          35.0   1.28  
xinter      25.0   2.67  
sigma       25.0   1.62  
Processing FBRB
20.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 119 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       2.169  0.072   2.043    2.284      0.034    0.026       5.0   
m2       0.027  0.000   0.026    0.027      0.000    0.000      18.0   
xinter  11.391  0.050  11.316   11.482      0.023    0.018       5.0   
sigma    0.158  0.004   0.151    0.165      0.000    0.000   11141.0   

        ess_tail  r_hat  
m1          12.0   2.34  
m2          85.0   1.18  
xinter      12.0   2.29  
sigma    11748.0   1.00  
Processing SSTB
20.0


Sequential sampling (4 chains in 1 job)
CompoundStep
>Metropolis: [m1]
>Metropolis: [m2]
>Metropolis: [xinter]
>Metropolis: [sigma]


  "accept": np.mean(np.exp(self.accept_rate_iter)),


Sampling 4 chains for 5_000 tune and 15_000 draw iterations (20_000 + 60_000 draws total) took 110 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
m1       3.191  0.475   2.532    4.102      0.232    0.177       4.0   
m2       0.020  0.001   0.019    0.021      0.000    0.000      10.0   
xinter  11.073  0.157  10.810   11.330      0.076    0.058       4.0   
sigma    0.159  0.007   0.147    0.172      0.003    0.002       6.0   

        ess_tail  r_hat  
m1          11.0   3.10  
m2          10.0   1.38  
xinter      11.0   3.08  
sigma       12.0   1.69  
2018 sta typo DONB
Skipping DONB, no data


In [7]:
mcmc_array = np.column_stack((np.array(events_list), np.array(mags_list), np.array(stas_list), np.array(hypdist_list),
                             np.array(xhighs_list), np.array(m1s_list), np.array(m2s_list), np.array(xinters_list),
                             np.array(sigmas_list), np.round(np.array(trans_time_list),2)))

mcmc_array

array([['2009-10-03_M6.1', '6.1', 'NTTB', '60', '22',
        '9.840828840593925', '0.1043521451767861', '10.35993135046358',
        '0.14736688543045745', '10.4'],
       ['2009-10-03_M6.1', '6.1', 'HGSB', '15', '22', '6.47403996072491',
        '0.09935716784927456', '10.666897440611727',
        '0.2343774368462061', '10.7'],
       ['2009-10-03_M6.1', '6.1', 'ZANB', '26', '17',
        '18.238399881767933', '0.12456797186054946',
        '10.246524007172965', '0.2737835073028753', '10.3'],
       ['2009-10-03_M6.1', '6.1', 'FBRB', '56', '29',
        '4.2066592365708875', '0.04907482345029485', '10.85161841836865',
        '0.20710983608931577', '10.9'],
       ['2013-10-31_M6.3', '6.3', 'TRKB', '65', '25', '7.16166071846211',
        '0.08132421087918383', '10.654061684179563',
        '0.31678329910610137', '10.7'],
       ['2013-10-31_M6.3', '6.3', 'SJNB', '59', '25',
        '5.667812291065807', '0.09314884789992844', '10.714307323573994',
        '0.24557338540112436', '10.8'

In [8]:
# # Insert the re-run stations into the results array

# old_mcmc_array = np.load(path + 'mcmc_arrays/mcmc_colstack_array.npy')
# print(old_mcmc_array.shape)

# ssnb13 = np.where((old_mcmc_array[:,0] == '2013-10-31_M6.3') & (old_mcmc_array[:,2] == 'SSNB'))[0]
# # print(old_mcmc_array[ssnb13])
# sstb13 = np.where((old_mcmc_array[:,0] == '2013-10-31_M6.3') & (old_mcmc_array[:,2] == 'SSTB'))[0]
# # print(old_mcmc_array[sstb13])
# trkb18 = np.where((old_mcmc_array[:,0] == '2018-02-04_M6.1') & (old_mcmc_array[:,2] == 'TRKB'))[0]
# # print(old_mcmc_array[trkb18])

# del_array = np.delete(old_mcmc_array, [ssnb13, sstb13, trkb18], axis = 0)
# print(del_array.shape)

# ins_array = np.insert(del_array, ssnb13, mcmc_array[0], axis = 0)
# ins_array = np.insert(ins_array, sstb13, mcmc_array[1], axis = 0)
# ins_array = np.insert(ins_array, trkb18, mcmc_array[2], axis = 0)
# print(ins_array.shape)

# # pd_old = pd.DataFrame(old_mcmc_array)
# # pd_ins = pd.DataFrame(ins_array)

# np.save(path + 'mcmc_arrays/mcmc_colstack_array.npy', ins_array)

In [9]:
mcmc_array = np.column_stack((np.array(events_list), np.array(mags_list), np.array(stas_list), np.array(hypdist_list),
                             np.array(xhighs_list), np.array(m1s_list), np.array(m2s_list), np.array(xinters_list),
                             np.array(sigmas_list), np.round(np.array(trans_time_list),2)))

np.save(path + 'mcmc_arrays_uncal_newdetrend/mcmc_colstack_array.npy', mcmc_array)

In [10]:
taiwan_mcmc_array = np.load(path + 'mcmc_arrays_uncal_newdetrend/mcmc_colstack_array.npy')
taiwan_mcmc_array.shape

# Columns:
# 0. Event name
# 1. Magnitude
# 2. Station name
# 3. Hypocentral distance (km)
# 4. xhigh
# 5. m1 slope
# 6. m2 slope
# 7. intersection point
# 8. sigma
# 9. transition time

(21, 10)

In [11]:
pd_mcmc = pd.DataFrame(taiwan_mcmc_array, columns = ['eq_id', 'mag', 'sta', 'hypdist', 'xhigh', 'm1', 'm2', 'xinter', 'sigma', 'trans_time'])
pd_mcmc.to_csv(path + 'mcmc_arrays_uncal_newdetrend/mcmc_results_summary.csv', index = False)

In [12]:
# Figure out the "good" and "bad" performing waveforms using an L2 norm

mcmc_results = pd.read_csv(path + 'mcmc_arrays_uncal_newdetrend/mcmc_results_summary.csv')
mcmc_results

Unnamed: 0,eq_id,mag,sta,hypdist,xhigh,m1,m2,xinter,sigma,trans_time
0,2009-10-03_M6.1,6.1,NTTB,60,22,9.840829,0.104352,10.359931,0.147367,10.4
1,2009-10-03_M6.1,6.1,HGSB,15,22,6.47404,0.099357,10.666897,0.234377,10.7
2,2009-10-03_M6.1,6.1,ZANB,26,17,18.2384,0.124568,10.246524,0.273784,10.3
3,2009-10-03_M6.1,6.1,FBRB,56,29,4.206659,0.049075,10.851618,0.20711,10.9
4,2013-10-31_M6.3,6.3,TRKB,65,25,7.161661,0.081324,10.654062,0.316783,10.7
5,2013-10-31_M6.3,6.3,SJNB,59,25,5.667812,0.093149,10.714307,0.245573,10.8
6,2013-10-31_M6.3,6.3,HGSB,11,20,19.638991,0.086636,10.230387,0.219409,10.3
7,2013-10-31_M6.3,6.3,CHMB,11,20,19.979715,0.080786,10.254558,0.317673,10.3
8,2013-10-31_M6.3,6.3,SSNB,16,15,19.96242,0.229556,10.177394,0.171723,10.2
9,2013-10-31_M6.3,6.3,ZANB,21,20,19.916747,0.065183,10.209131,0.198727,10.3


In [13]:
print(np.mean(mcmc_results['m1'].values))
print(np.mean(mcmc_results['m2'].values))
print(np.mean(mcmc_results['xinter'].values))

print(np.median(mcmc_results['m1'].values))
print(np.median(mcmc_results['m2'].values))
print(np.median(mcmc_results['xinter'].values))

8.838606572224947
0.12224461845733507
10.579563980887823
6.47403996072491
0.0811422960146542
10.58054393444255
