In [1]:
from obspy import UTCDateTime
from obspy.clients.fdsn import Client as FDSN_Client
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle
import os
from progress.bar import Bar
from tqdm.notebook import tqdm
from multiprocessing import Pool
from glob import glob

In [3]:
quakelist=pd.read_csv('data/quakelist1.txt',sep="|")
quakelist.columns = quakelist.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

stationlist=pd.read_csv('data/station_h_data.txt',sep="|")
stationlist.columns = stationlist.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [4]:
np.random.seed(42)
rand_inds=np.array(range(len(quakelist)))
np.random.shuffle(rand_inds)

In [6]:
client = FDSN_Client("GEONET")
client_nrt = FDSN_Client("https://service-nrt.geonet.org.nz")

In [7]:
import warnings
warnings.filterwarnings('ignore')

In [8]:
def close_to_quake(tt):
    #returns True if the time is within an hour (either side) of a quake in quakelist, otherwise False
    quaketime_diffs=[UTCDateTime(quaketime)-UTCDateTime(tt) for quaketime in quakelist['time'].to_numpy()]
    if any(np.abs(quaketime_diffs)<3600): #if any quake happens within an hour either side of the time
        too_close=True
    else:
        too_close=False
    return too_close

In [9]:
def good_stream(st1):
    #Function for determining whether a stream st1 is good data
    #Is it of at least 3 traces long
    #Does it have the first 3 traces channels as "HHZ", "HHE" or "HHN"
    #it have the correct number of points, 32000 or 32001
    #test for any std==0 
    stream_good=1
    if len(st1)<3: #is it at least 3 long
        return False
    else:
        filled=[0,0,0]
        for tr_ind in range(3): #testing that it has the 3 channels
            if st1[tr_ind].stats.channel=="HHZ":
                fill_ind=0
            elif st1[tr_ind].stats.channel=="HHN":
                fill_ind=1
            elif st1[tr_ind].stats.channel=="HHE":
                fill_ind=2
            else: 
                return False
            
            filled[fill_ind]=1
            if not (st1[tr_ind].stats.npts==32000 or st1[tr_ind].stats.npts==32001): #testing it has the correct npts
#             if not (st1[tr_ind].stats.npts==240001 or st1[tr_ind].stats.npts==240000):

                return False

            if np.std(st1[tr_ind].data)==0: #making sure its actually changing in time
                return False
    
        if not filled ==[1,1,1]: #making sure it had all the channels
            return False
        
    return True

In [32]:
def find_rand_times(n_samples,wait_time=3600):
    quaketimes=np.array(([float(UTCDateTime(quaketime)) for quaketime in quakelist['time'].to_numpy()]))
    max_time=max(quaketimes)
    min_time=min(quaketimes)
    rand_times=np.zeros(n_samples)
    ii=0
    
    while ii<n_samples:
        t_temp=np.random.uniform(min_time,max_time)
        if all(np.abs(quaketimes-t_temp)>wait_time):
            rand_times[ii]=t_temp
            ii+=1
#             if ii%20==0:
#                 print(ii)

    return rand_times

In [33]:
find_rand_times(500)

array([  1.57777859e+09,   1.52621734e+09,   1.51756405e+09,
         1.53019613e+09,   1.39663971e+09,   1.52489987e+09,
         1.40073684e+09,   1.43038844e+09,   1.45057505e+09,
         1.44268170e+09,   1.54025535e+09,   1.54469642e+09,
         1.50204834e+09,   1.52690872e+09,   1.43699366e+09,
         1.56393683e+09,   1.48479638e+09,   1.48850061e+09,
         1.43637152e+09,   1.46275204e+09,   1.51232103e+09,
         1.57432369e+09,   1.46048245e+09,   1.46631609e+09,
         1.54223950e+09,   1.40725247e+09,   1.52312774e+09,
         1.39290896e+09,   1.51778221e+09,   1.54389721e+09,
         1.36838084e+09,   1.54009283e+09,   1.57691524e+09,
         1.58248870e+09,   1.55546446e+09,   1.53643719e+09,
         1.42171344e+09,   1.41193245e+09,   1.46201959e+09,
         1.53679311e+09,   1.46256028e+09,   1.40153140e+09,
         1.58257195e+09,   1.44232538e+09,   1.52605587e+09,
         1.48417657e+09,   1.57843506e+09,   1.48537244e+09,
         1.49974566e+09,

In [None]:
n_samples=2000

rand_times=np.zeros(n_samples)
ii=0
while ii< n_samples:
    t_temp=np.random.uniform(min_time,max_time)
    if not close_to_quake(t_temp):
        rand_times[ii]=t_temp
        ii+=1
        if ii%10==0:
            print(ii)

In [None]:
data_dir='/media/peter/data/earthquakenz/data/noquakes/'
station_list1=['APZ', 'BFZ', 'BKZ', 'CTZ',  'DSZ', 'EAZ', 'FOZ',
       'FWVZ', 'GLKZ', 'GRZ', 'GVZ', 'HAZ', 'HIZ', 'INZ', 'JCZ', 'KHEZ',
       'KHZ', 'KNZ',  'LBZ', 'LTZ', 'MLZ', 'MQZ',
       'MRZ', 'MSZ', 'MWZ', 'MXZ', 'NNZ', 'ODZ', 'OPRZ', 'OPZ', 'OTVZ',
       'OUZ', 'OXZ', 'PUZ', 'PXZ', 'PYZ', 'QRZ', 'RATZ', 'RIZ', 
       'RTZ', 'SYZ', 'THZ', 'TLZ', 'TMVZ', 'TOZ', 'TRVZ', 'TSZ', 'TUZ',
        'VRZ', 'WAZ', 'WCZ', 'WEL', 'WHVZ', 'WHZ', 'WIZ', 'WKZ',
       'WSRZ', 'WVZ']
rand_times=np.load(data_dir+'rand_times.npy')
# station_list1=['APZ', 'GRZ', 'GVZ', 'HAZ', 'HIZ', 'INZ', 'JCZ', 'KHEZ',
#        'KHZ', 'KNZ', 'WCZ', 'WEL', 'WHVZ', 'WHZ', 'WIZ', 'WKZ',
#        'WSRZ', 'WVZ']
def paralell_download_noquake(t_start,step=10):
    t_duration=5*60
    samplerate=100
    blok=np.zeros((int(np.ceil(t_duration*samplerate/step)),3,len(station_list1)))
    starttime=10
    start_times=np.zeros((3,len(station_list1)))
    timeid=str(t_start)
#     print('Starting: '+ str(UTCDateTime(timeid)))
    print('Starting: '+ str((timeid)))
    for station_ind in tqdm(range(len(station_list1)),leave=False,desc=timeid):
        if stationlist[stationlist['station']==station_list1[station_ind]]['starttime'].to_numpy()<UTCDateTime(t_start-10):
            attempts=0
            downloaded_stream=0
            while attempts <4:
                try:
    #                 st= client.get_waveforms("NZ",stationlist['station'][station_ind],"*", "HHZ,HHN,HHE,HH1,HH2", t-t_before, t+t_after ,minimumlength=t_before,attach_response=True)
                    st= client.get_waveforms("NZ",station_list1[station_ind] ,"*", "HH?", UTCDateTime(t_start-10), UTCDateTime(t_start+5*60+10),attach_response=True)
#                     print(st)
                    downloaded_stream=1
#                     print('here')
                    break
                except:
                    attempts+=1
            
            if downloaded_stream:
                if good_stream(st):
                    filled=[0,0,0]
                    for tr_ind in range(3):
                        if st[tr_ind].stats.channel=="HHZ":
                            fill_ind=0
                        elif st[tr_ind].stats.channel=="HHN":
                            fill_ind=1
                        elif st[tr_ind].stats.channel=="HHE":
                            fill_ind=2
                        else:
                            print('SOMETHING WRONG WITH THE HH* INDS')
                        blok[:,fill_ind,station_ind]=st[tr_ind].data[starttime*samplerate:int(t_duration*samplerate)+starttime*samplerate:step]
                        filled[fill_ind]=1
                        start_times[fill_ind,station_ind]=float(st[tr_ind].stats.starttime)

                    if not filled== [1,1,1]:
                        print('NOT FILLED!')
                        
                    if station_ind>0:
                        if np.max(np.abs(start_times[:,station_ind]-start_times[:,station_ind-1]))>1.5/samplerate:
                            print('Something off with start times around at ' + timeid+str(station_ind)+', '+station_list1[station_ind])
                            print(start_times[:,station_ind]-start_times[:,station_ind-1])
                            print(np.max(np.abs(start_times[:,station_ind]-start_times[:,station_ind-1])))
                else:
                    return
            else: 
                return
    save_attempts=0
    while save_attempts<5:
        try:  
            np.save(data_dir+'bloks1/'+timeid+'.npy',blok)
            print('Ending: '+ str((timeid)))

            break
        except:
            save_attempts+=1