In [2]:
import pandas_read_xml as pdx
import pandas as pd
import os
# import obspy UTCDateTime
from obspy.core import UTCDateTime

In [4]:
def get_station(x):
  return x['@stationCode']

def get_author(x):
    return x['author']

def get_channel(x):
    return x['@channelCode']

def get_loc(x):
    return x['@locationCode']

def open_dict_time(x):
    return x['value']

def load_df(xml_path, xml_type):
    '''Create a dict of pandas dataframes from xml seiscomp files

    Parameters
    ----------
    data_dir : str
        Path containing xml files
    xml_type : str
        Type of the xml, can be pick or event

    Returns
    -------
    dic
        Dictionary with pandas dataframes
    '''
    
    print(xml_path)
    df = pdx.read_xml(xml_path, ['seiscomp', 'EventParameters', xml_type])

    df['time'] = pd.to_datetime(df['time'].apply(open_dict_time))
    df['station'] = df['waveformID'].apply(get_station)
    df['author'] = df['creationInfo'].apply(get_author)
    df['channel'] = df['waveformID'].apply(get_channel)
    df['loc'] = df['waveformID'].apply(get_loc)

    return df

def load_and_merge(xml_dir, xml_type):
    df_list = []
    for file_ in os.listdir(xml_dir):
        if file_.endswith(".xml"):
            df = load_df(os.path.join(xml_dir, file_), xml_type)
            df_list.append(df)
    df = pd.concat(df_list)
    return df

def get_pick_times(xml_path):
    print(xml_path)
    df = pdx.read_xml(xml_path, ['seiscomp', 'EventParameters', 'pick'])
    print(df)
    df['time'] = pd.to_datetime(df['time'].apply(open_dict_time))
    
    t = df['time'].dt.tz_localize(None).astype('str').to_list()
    times = [UTCDateTime(i) for i in t]
    return times

In [5]:
get_pick_times('/home/sgc/my_repositories/sc3-autotuner/picks_xml/SGC2020tjcgip_picks.xml')

/home/sgc/my_repositories/sc3-autotuner/picks_xml/SGC2020tjcgip_picks.xml
                                                   0  \
0  {'@publicID': '20201001.013908.51-CM.BAR2.00.H...   

                                                   1  
0  {'@publicID': '20201001.013921.38-CM.BAR2.00.H...  


KeyError: 'time'

In [33]:
# extract the time values from the xml file who looks like this:
"""
<?xml version="1.0" encoding="UTF-8"?>
<seiscomp xmlns="http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11" version="0.11">
  <EventParameters>
    <pick publicID="20201001.013908.51-CM.BAR2.00.HHZ">
      <time>
        <value>2020-10-01T01:39:08.516548Z</value>
      </time>
    </pick>
  </EventParameters>
</seiscomp>
"""
import xml.etree.ElementTree as ET
tree = ET.parse('/home/sgc/my_repositories/sc3-autotuner/picks_xml/SGC2020tjcgip_picks.xml')
root = tree.getroot()
root.attrib


{'version': '0.11'}

In [35]:
import xml.etree.ElementTree as ET
ns = {'seiscomp': 'http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11'}
root = ET.parse('/home/sgc/my_repositories/sc3-autotuner/picks_xml/SGC2020tjcgip_picks.xml').getroot()
for pick in root.findall('seiscomp:EventParameters/seiscomp:pick', ns):
    print(pick.find('seiscomp:time/seiscomp:value', ns).text)

2020-10-01T01:39:08.516548Z
2020-10-01T01:39:21.386548Z


In [27]:
import xml.etree.ElementTree as ET
ns = {'seiscomp': 'http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11'}
root = ET.parse('/home/sgc/my_repositories/sc3-autotuner/picks_xml/SGC2020tjcgip_picks.xml').getroot()
picks = root.findall('seiscomp:EventParameters/seiscomp:pick', ns)
p = picks[0]
print(p.find('seiscomp:phaseHint', ns).text)

P


In [34]:
[elem.tag for elem in root.iter()]

['{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}seiscomp',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}EventParameters',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}pick',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}time',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}value',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}waveformID',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}filterID',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}methodID',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}phaseHint',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}evaluationMode',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}creationInfo',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}agencyID',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}author',
 '{http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11}creationTime',
 '{http://geofon.gfz-potsdam.de/ns/se

In [None]:
ns = {'seiscomp': 'http://geofon.gfz-potsdam.de/ns/seiscomp3-schema/0.11'}


In [None]:
df = load_and_merge('picks_xml', 'pick')

picks_xml/SGC2020tjbvjw_picks.xml
picks_xml/SGC2020tjcgip_picks.xml


In [None]:
df

Unnamed: 0,@publicID,time,waveformID,filterID,methodID,phaseHint,evaluationMode,creationInfo,station,author,channel,loc
0,20201001.012630.11-CM.BAR2.00.HHZ,2020-10-01 01:26:30.118393+00:00,"{'@networkCode': 'CM', '@stationCode': 'BAR2',...","RMHP(10)>>ITAPER(30)>>BW(4,2,8)>>STALTA(0.1,5.7)",Trigger,P,automatic,"{'agencyID': 'SGC', 'author': 'bdrsn', 'creati...",BAR2,bdrsn,HHZ,0
0,20201001.013908.65-CM.BAR2.00.HHZ,2020-10-01 01:39:08.658391+00:00,"{'@networkCode': 'CM', '@stationCode': 'BAR2',...","RMHP(10)>>ITAPER(30)>>BW(4,2,8)>>STALTA(0.1,5.7)",Trigger,P,automatic,"{'agencyID': 'SGC', 'author': 'bdrsn', 'creati...",BAR2,bdrsn,HHZ,0


In [None]:
t = df['time'].dt.tz_localize(None).astype('str').to_list()
print(t)

times = [UTCDateTime(i) for i in t]
print(times)

['2020-10-01 01:26:30.118393', '2020-10-01 01:39:08.658391']
[UTCDateTime(2020, 10, 1, 1, 26, 30, 118393), UTCDateTime(2020, 10, 1, 1, 39, 8, 658391)]


In [None]:
df = pdx.read_xml('SGC2020tjcgip_picks.xml', ['seiscomp', 'EventParameters', 'pick'])
df.head()

Unnamed: 0,@publicID,time,waveformID,filterID,methodID,phaseHint,evaluationMode,creationInfo
0,20201001.013908.65-CM.BAR2.00.HHZ,{'value': '2020-10-01T01:39:08.658391Z'},"{'@networkCode': 'CM', '@stationCode': 'BAR2',...","RMHP(10)>>ITAPER(30)>>BW(4,2,8)>>STALTA(0.1,5.7)",Trigger,P,automatic,"{'agencyID': 'SGC', 'author': 'bdrsn', 'creati..."


In [None]:
a = df[0]
a.to_list()

In [None]:
df = load_df('picks_xml/merged2.xml', 'pick')
df.info()

In [None]:
df

Unnamed: 0,@publicID,time,waveformID,filterID,methodID,phaseHint,evaluationMode,creationInfo,station,author,channel,loc
0,20201001.012630.11-CM.BAR2.00.HHZ,2020-10-01 01:26:30.118393+00:00,"{'@networkCode': 'CM', '@stationCode': 'BAR2',...","RMHP(10)>>ITAPER(30)>>BW(4,2,8)>>STALTA(0.1,5.7)",Trigger,P,automatic,"{'agencyID': 'SGC', 'author': 'bdrsn', 'creati...",BAR2,bdrsn,HHZ,0


In [None]:
df.loc[0,'waveformID']

OrderedDict([('@networkCode', 'CM'),
             ('@stationCode', 'BAR2'),
             ('@locationCode', '00'),
             ('@channelCode', 'HHZ')])

In [4]:
df = pd.read_csv('../results_P.csv')

In [5]:
df.head()

Unnamed: 0,net.sta,p_sta,p_sta_width,f_min,f_width,trig_on,best_loss
0,CM.BAR2,2.081039,6.069081,2,2,2.499587,0.725
1,CM.BAR2,0.596044,24.523313,10,4,3.618983,0.5975


In [18]:
# selecting the row with net.sta equal to CM.BAR2 and with the highest value of best_loss
df[df['net.sta'] == 'CM.BAR2'].sort_values(by='best_loss', ascending=False).iloc[0].to_dict()

{'net.sta': 'CM.BAR2',
 'p_sta': 2.081039399182276,
 'p_sta_width': 6.069080800141486,
 'f_min': 2,
 'f_width': 2,
 'trig_on': 2.499586813154526,
 'best_loss': 0.7250000000000001}

In [19]:
d = {'a':1, 'b':2}
for k in d:
    print(k)

a
b


In [21]:
not 'a'

False