In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import os
import creep_event_picker as cep
import obspy
import cmcrameri.cm as cmc
import h5py
import re
import matplotlib.dates as mdates
%matplotlib qt




In [2]:
def check_dir(path):
    isExist = os.path.exists(path)
    if not isExist:
        # Create a new directory because it does not exist 
        os.makedirs(path, exist_ok=True)

Import creepmeter database

In [5]:
Creepmeter_dataframe = pd.read_excel('../../Data/Creepmeter_list.xlsx')
Creepmeter_dataframe.drop(Creepmeter_dataframe[Creepmeter_dataframe['Creepmeter_abbrv']!='NS45'].index,inplace=True)

In [6]:
frequency_dict = {
        1/60: '1S',    # 1 second
        1/6: '10S',  # 10 seconds
        1/2: '30S',  # 30 seconds
        1: '1T',  # 1 minute
        2: '2T',  # 2 minutes
        5: '5T',  # 5 minutes
        10: '10T',# 10 minutes
        15: '15T', # 15 minutes
        30: '30T',# 30 minutes
        60: '60T'    # 60 minutes
    }

In [7]:
plt.close('all')
for i in range(len(Creepmeter_dataframe)):
    try:
        with h5py.File('../../Data/DATA_tidied/HDF5/{k}.h5'.format(k=Creepmeter_dataframe['Creepmeter_abbrv'].iloc[i]), 'r') as f:
            # Loop through each key in the file
            keys = list(f.keys())
            for key in f.keys():
                if key not in ['Temperature','Temperature_1T','Temperature_5T','Daily_measurements','Manual_measurements','Orthogonal']:
                    print("Key:", key)
                    print('extracting data')
                    data = f[key]  # Access the dataset
                    data_keys = list(data.keys())
                    print(data_keys)
                    slip = data[data_keys[0]][:]
                    time =  data[data_keys[1]][:]
                    decoded_time = [byte_str.decode('utf-8') for byte_str in time]
                    decoded_time = pd.to_datetime(decoded_time)





                    match = re.search(r'_(\d*\.?\d+)mins$', key)
                    if match:
                        freq_value = float(match.group(1))
                        new_suffix = frequency_dict.get(freq_value)
                        if new_suffix:
                            new_key = re.sub(r'_\d*\.?\d+mins$', f'_{new_suffix}', key)
                        else:
                            new_key = key  # or raise an error / warning
                    else:
                        new_key = key  # unchanged if pattern not matched

                    print(new_key)

                    df_picks = pd.read_csv("../../Data/DATA_tidied/Picks/{q}_picks.csv".format(q =new_key),index_col=0)
                    QC = []
                    df_end_final=pd.DataFrame(columns=['ET'])
                    df_start_final=pd.DataFrame(columns=['ST'])
                    if 'Quality_check_final' in df_picks.columns:
                        print("Alread Qc'ed")
                    else:
                        print('Identifying events')
                        df_picks.drop(df_picks[df_picks['Quality_check_new'] != 'CE'].index,inplace=True)
                        df_picks.sort_values(by='ST_new',inplace=True)
                        df_picks.reset_index(inplace=True, drop=True)

                        print('beginning QC for {k}'.format(k=new_key))
                        
                        for p in range(len(df_picks)):

                            fig = plt.figure(figsize=(15,5))
                            ax = plt.subplot(1,1,1)

                            fig.suptitle('events {a} of {c}'.format(a = p+1,c =len(df_picks) ))
                            boolarr_selecta = np.logical_and(np.array(decoded_time)>=pd.to_datetime(df_picks['ST_new'].iloc[p])-dt.timedelta(hours=72)
                                                                ,np.array(decoded_time)<=pd.to_datetime(df_picks['ET_new'].iloc[p])+dt.timedelta(hours = 72))
                            tm_selecta = decoded_time[boolarr_selecta]


                            creep_selecta = slip[boolarr_selecta]
                            creep_selecta = creep_selecta - creep_selecta[0]
                            plt.plot(tm_selecta,creep_selecta,color='blue',marker='o',markersize=6,markerfacecolor='none',)
                            left,right = ax.get_xlim()
                            top,bottom = ax.get_ylim()
                            plt.ylim(top,bottom)
                            plt.xlim(left,right)
                            

                            plt.vlines([df_picks['ST_new'].iloc[p],df_picks['ET_new'].iloc[p]],ymin=bottom,ymax=top,colors=['red','green'])
                            plt.hlines([0.02],xmin=left,xmax=right,colors=['orange'])
                            plt.ylabel('mm')
                        
                            scale = 1.1
                            zp = cep.ZoomPan()
                            figZoom = zp.zoom_factory(ax, base_scale = scale)
                            figPan = zp.pan_factory(ax)
                            s = 1
                            fig.set_size_inches(11,11)
                            fig.tight_layout()
                            plt.show()
                            ##### QC ####
                            print('{m}/{n}'.format(m=p+1,n=len(df_picks)))  
                            print('is the start pick good?\n esc=yes, click=no')
                            check_start = plt.ginput(1,timeout = -1)
                            if len(check_start) ==1:
                                for sublist in check_start:
                                    df = pd.DataFrame({'ST':[sublist[0]]})
                                    df_start_final = pd.concat([df_start_final,df])
                                print('new start picked\n {k}'.format(k=mdates.num2date(sublist[0])))
                            else:
                                print('start all good! :D')
                                df = pd.DataFrame({'ST':mdates.date2num([df_picks.ST.iloc[p]])})
                                df_start_final = pd.concat([df_start_final,df])
                                print('start picked\n {k}'.format(k=mdates.num2date(df.ST.iloc[0])))
                            print("now pick the end\n esc=keep current, click=pick new")
                            pts = plt.ginput(1,timeout = -1)
                            if len(pts)==1:
                                for sublist in pts:
                                    df = pd.DataFrame({'ET':[sublist[0]]})
                                    df_end_final = pd.concat([df_end_final,df])
                                    print('new end picked\n {k}'.format(k=mdates.num2date(sublist[0])))
                            else:
                                print('end all good! :D')
                                df = pd.DataFrame({'ET':mdates.date2num([df_picks.ET.iloc[p]])})
                                df_end_final = pd.concat([df_end_final,df])
                                print('end picked\n {k}'.format(k=mdates.num2date(df.ET.iloc[0])))
                            
                            
                            print('is this a creep event?\n esc=no, click=yes')
                            check_pts = plt.ginput(1,timeout = -1)
                            
                            if len(check_pts) == 1:
                                QC.append('CE')
                                print('YES!! :D')
                            else:
                                QC.append('NCE')
                                print('Nope :(')

                            plt.close()
                            ends_list=[]
                            for r in range(len(df_end_final)):
                                time_convert = mdates.num2date(float(df_end_final.ET.iloc[r]))
                                nearest_e = decoded_time[np.argmin(np.abs(np.array(decoded_time) - np.datetime64(time_convert)))]
                                ends_list.append(nearest_e)
                            start_list=[]
                            for r in range(len(df_end_final)):
                                time_convert = mdates.num2date(float(df_start_final.ST.iloc[r]))
                                nearest_s = decoded_time[np.argmin(np.abs(np.array(decoded_time) - np.datetime64(time_convert)))]
                                start_list.append(nearest_s)
                        
                        
                        df_picks['ST_final'] = start_list
                        df_picks['ET_final'] = ends_list
                        df_picks['Quality_check_final'] = QC
                        df_picks.to_csv("../../Data/DATA_tidied/Picks/{q}_picks.csv".format(q =new_key))
                        
                        print(df_picks)
                        print('QC completed')
    except FileNotFoundError:
        print('NO EVENTS')


Key: NS45_1.0mins
extracting data
['Slip_NS45_1.0mins', 'Time_NS45_1.0mins']
NS45_1T
Identifying events
beginning QC for NS45_1T


IndexError: index 0 is out of bounds for axis 0 with size 0

In [9]:
df_picks['ET_new'].iloc[p]

'2018-01-08 06:01:00'

In [10]:
plt.figure()
plt.plot(decoded_time,slip)
plt.show()

In [None]:
boolarr_selecta = np.logical_and(np.array(decoded_time)>=pd.to_datetime(df_picks['ST_new'].iloc[p])-dt.timedelta(hours=72)
                                                                ,np.array(decoded_time)<=pd.to_datetime(df_picks['ET_new'].iloc[p])+dt.timedelta(hours = 72))

In [None]:
boolar = decoded_time>=pd.to_datetime(df_picks['ST_new'].iloc[p])-dt.timedelta(hours=72)

In [None]:
plt.plot(decoded_time,slip)

[<matplotlib.lines.Line2D at 0x7f9153334070>]

In [None]:
pd.to_datetime(df_picks['ST_new'].iloc[p])

Timestamp('2010-04-04 22:45:00')

In [None]:
pd.to_datetime(df_picks['ET_new'].iloc[p])

Timestamp('2010-04-13 20:30:00')