In [None]:
#general libraries
import os
import sys
import glob
import pathlib
import warnings
import timeit
from tqdm import tqdm
#regular expressions
import re
#database libraries
from sqlite3 import connect
#arithmetic libraries
import numpy as np
import scipy
#statistics libraries
import pandas as pd
#ground motion models
import pygmm
#ipython
from IPython.display import display, clear_output

from mpi4py import MPI

dir_db = '/resnick/groups/enceladus/glavrent/Scalable_GPs/Raw_files/scec/'
fn_scec_dbs = {'T2.00s':  'study_22_12_2.0sec.sqlite',
               'T2.20s':  'study_22_12_2.2sec.sqlite',
               'T2.40s':  'study_22_12_2.4sec.sqlite',
               'T2.60s':  'study_22_12_2.6sec.sqlite',
               'T2.80s':  'study_22_12_2.8sec.sqlite',
               'T3.00s':  'study_22_12_3.0sec.sqlite',
               'T3.50s':  'study_22_12_3.5sec.sqlite',
               'T4.00s':  'study_22_12_4.0sec.sqlite',
               'T4.40s':  'study_22_12_4.4sec.sqlite',
               'T5.00s':  'study_22_12_5.0sec.sqlite',
               'T5.50s':  'study_22_12_5.5sec.sqlite',
               'T6.00s':  'study_22_12_6.0sec.sqlite',
               'T6.50s':  'study_22_12_6.5sec.sqlite',
               'T7.50s':  'study_22_12_7.5sec.sqlite',
               'T8.50s':  'study_22_12_8.5sec.sqlite',
               'T10.00s': 'study_22_12_10.0sec.sqlite'}
# fn_scec_dbs = {'T2.00s':  'study_22_12_lf_6_periods.sqlite',
#                'T3.00s':  'study_22_12_lf_6_periods.sqlite',
#                'T4.00s':  'study_22_12_lf_6_periods.sqlite',
#                'T5.00s':  'study_22_12_lf_6_periods.sqlite',
#                'T7.50s':  'study_22_12_lf_6_periods.sqlite',
#                'T10.00s': 'study_22_12_lf_6_periods.sqlite'}

#periods to process
# per2process = [2.0,2.2,2.4,2.6,2.8,3.0,3.5,4.0,4.4,5.0,5.5,6.0,6.5,7.5,8.5,10.0]
# per2process = [2.0,3.0,4.0,5.0,7.5,10.0]
# per2process = [2.0]
#periods to process (in separate batches - testing)
per2process = [2.0,2.2]     #batch 1
# per2process = [2.4,2.6]     #batch 2
#periods to process (in separate batches)
# per2process = [2.0,2.2,2.4,2.6,2.8] #batch 1
# per2process = [3.0,3.5,4.0,4.4]     #batch 2
# per2process = [5.0,5.5,6.0,6.5]     #batch 3
# per2process = [7.5,8.5,10.0]        #batch 4

#reset database
flag_reset = False

#ground motion models to evaluate
gmm_dict = {'ASK14':pygmm.AbrahamsonSilvaKamai2014, 'CY14':pygmm.ChiouYoungs2014}
#gmm_dict = {'ASK14':pygmm.AbrahamsonSilvaKamai2014}

#output directories
dir_out = '/resnick/groups/enceladus/glavrent/Scalable_GPs/Data/preprocessing_jz/'
dir_fig = dir_out + 'figures/'

#ground motion database
fn_gm_db = 'gm_db.sqlite'
# fn_gm_db = 'gm_db_ASK14.sqlite'

#create output directory
if not os.path.isdir(dir_out): pathlib.Path(dir_out).mkdir(parents=True, exist_ok=True)


if os.path.exists(dir_out+fn_gm_db):
    if flag_reset:
        os.remove(dir_out+fn_gm_db) #remove old database if exists
        flag_metadata = True        #store metadata
    else:
        flag_metadata = False
else:
    flag_metadata = False

In [None]:
## Check if any of the sqlite tables has period different from the periods that the filename suggests
for j, per in tqdm(enumerate(per2process)):
    #set up scec database connection
    db_scec_cnx = connect(dir_db+fn_scec_dbs['T%.2fs'%per])
    query = "SELECT DISTINCT IM_Period FROM IM_Data;"
    df_scec_periods = pd.read_sql_query(query, db_scec_cnx)
    if (df_scec_periods.shape[0] != 1) or (df_scec_periods.IM_Period.values[0] != per):
        print('ERROR: SCEC database has more than one period or period does not match the one in the filename.')
        sys.exit(1)

   IM_Period
0        2.0
   IM_Period
0        2.2
