In [1]:
from xml.etree import cElementTree as ElementTree
from urllib.request import urlopen

In [2]:
#
# Some generic utilities I use to parse the xml
#
#---------------------------------------------------------------------------------
# function to search an xml item for the value specified by the key
#   returns the value if the item is not found, the string 'None' is returned
#   if the value is not found.
#---------------------------------------------------------------------------------
def get_xitem_as_text(item,key):
    anItem = item.find(key,ns)
    if(anItem != None):
        return anItem.text
    else:
        return 'None'
#
#---------------------------------------------------------------------------------
#  same type of function as above, but this one also checks that the item
#     has a value provided.
#---------------------------------------------------------------------------------
def get_xitem_value_as_text(item,key,valuekey):
    anItem = item.find(key,ns)
    if(anItem == None):
        return 'None'
    else:
        value = anItem.find(valuekey,ns)
        if(value != None):
            return value.text
        else:
            return 'None'
#
#---------------------------------------------------------------------------------
def search_pdicts(key, value, list_of_dictionaries):
    return [element for element in list_of_dictionaries if element[key] == value]
#

Functions used to parse 

In [3]:
#
# To make outputting information simple, I insure that certain values are in each dictionary,  
#   whether they are defined in the xml or not. These dictionaries set up default values,
#   but as the xml is parsed, defined key value pairs are updated.
#
defaultPick = {'stationCode':'--','networkCode':'--','phase':'NA','time':'NA'}
#
defaultArrival = {'genericAmplitude':'NA','type':'NA','unit':'NA',
                  'period':'NA', 'evaluationMode':'NA','timeResidual':'NA',
                  'timeWeight':'NA'}
#
defaultAmplitude = {'pickID':'NA','genericAmplitude':'NA','period':'NA',
                  'unit':'NA'}  

#---------------------------------------------------------------------------------
# PICKS
def parse_picks(xev):
    xpicks = xev.findall('d:pick',ns)
    picks = []
    for pick in xpicks:
        pdict = defaultPick.copy()
        pdict.update(pick.attrib.copy())
        
        value = get_xitem_value_as_text(pick,'d:time','d:value')
        if(value!='None' or 'NA'):
            pdict.update({"time" :value})

        value = get_xitem_as_text(pick,'d:phaseHint')
        if(value!='None' or 'NA'):
            pdict.update({"phase" :value})
        pdict.update(pick.find('d:waveformID',ns).attrib)
        picks.append(pdict)
    return picks
#
#---------------------------------------------------------------------------------

# AMPLITUDES
def parse_amplitudes(xevent):
    xamplitudes = xevent.findall('d:amplitude',ns)
    amplitudes = []
    for xamp in xamplitudes:
        adict = xamp.attrib.copy()
        adict.update(defaultAmplitude)

        value = xamp.find('d:waveformID',ns)
        if(value != None):
            adict.update(value.attrib)
        
        value = get_xitem_value_as_text(xamp,'d:genericAmplitude','d:value')
        if(value!='None' or 'NA'):
            adict.update({"genericAmplitude" :value})

        value = get_xitem_as_text(xamp,'d:unit')
        if(value!='None' or 'NA'):
            adict.update({"unit" :value})
          
        amplitudes.append(adict)

    return amplitudes

#---------------------------------------------------------------------------------

# TYPE OF EVENT
def parse_types(xev):
    types = []
    xtypes = xev.findall('d:type',ns)  
    for type in xtypes:
        types.append(type.text)   
    return types

#---------------------------------------------------------------------------------

# DESCRIPTION OF EVENT
def parse_descriptions(xev):
    descriptions = []
    xdescriptions = xev.findall('d:description', ns)
    for description in xdescriptions:
        value = get_xitem_as_text(description,'d:text')      
        descriptions.append(value)
        
        
    return descriptions

In [4]:
# Namespaces
ns = {'q' : 'http://quakeml.org/xmlns/quakeml/1.2',
     'd': 'http://quakeml.org/xmlns/bed/1.2',
     'my_ns' : 'http://test.org/xmlns/0.1',
     'ns0' : 'http://some-page.de/xmlns/0.1',
     'ns1' : 'http://some-page.de/xmlns/1.0'}

def parse_usgs_xml(filepath):
    xtree = ElementTree.parse(filepath)
    xroot = xtree.getroot()
    #
    xeventParameters = xroot.findall('d:eventParameters',ns)
    #
    for ep in xeventParameters:
        xevents = ep.findall('d:event',ns)
        print(f'Found {(len(xevents))} events.' ) 
    #    
    events = []
    #
    i = 0
    for xev in xevents:
        # build an event dictionary 
        ev = {}
        ev['publicID'] = xev.attrib['publicID']
        #
        picks = parse_picks(xev)
        amplitudes = parse_amplitudes(xev)
        types = parse_types(xev)
        descriptions = parse_descriptions(xev)
        #   
        events.append({'eventInfo':ev, 'eventType':types, 'eventDescription': descriptions, 
                       'picks':picks,'amplitudes':amplitudes})
        #
        i += 1
        #
        print (f'parsed {i} events.')
        #
    return events

In [None]:
# Filepath to parse
events = parse_usgs_xml("LunarCatalog_Nakamura_1981_and_updates_v1.xml")

In [6]:
events

[{'eventInfo': {'publicID': 'smi:nakamura81/event/00001'},
  'eventType': ['earthquake'],
  'eventDescription': ['unclassified deep moonquake'],
  'picks': [{'stationCode': 'S11',
    'networkCode': 'XA',
    'phase': 'P',
    'time': '1969-07-27T23:48:00.000000Z',
    'publicID': 'smi:nakamura81/pick/S11/00001',
    'channelCode': '',
    'locationCode': ''}],
  'amplitudes': [{'publicID': 'smi:nakamura81/amplitude/S11/00001',
    'pickID': 'NA',
    'genericAmplitude': '3.1',
    'period': 'NA',
    'unit': 'other'}]},
 {'eventInfo': {'publicID': 'smi:nakamura81/event/00002'},
  'eventType': ['meteorite'],
  'eventDescription': ['meteoroid'],
  'picks': [{'stationCode': 'S11',
    'networkCode': 'XA',
    'phase': 'P',
    'time': '1969-07-28T11:46:00.000000Z',
    'publicID': 'smi:nakamura81/pick/S11/00002',
    'channelCode': '',
    'locationCode': ''}],
  'amplitudes': [{'publicID': 'smi:nakamura81/amplitude/S11/00002',
    'pickID': 'NA',
    'genericAmplitude': '1.8',
    'peri

In [7]:
import pandas as pd
df = pd.DataFrame(events)
df

Unnamed: 0,eventInfo,eventType,eventDescription,picks,amplitudes
0,{'publicID': 'smi:nakamura81/event/00001'},[earthquake],[unclassified deep moonquake],"[{'stationCode': 'S11', 'networkCode': 'XA', '...",[{'publicID': 'smi:nakamura81/amplitude/S11/00...
1,{'publicID': 'smi:nakamura81/event/00002'},[meteorite],[meteoroid],"[{'stationCode': 'S11', 'networkCode': 'XA', '...",[{'publicID': 'smi:nakamura81/amplitude/S11/00...
2,{'publicID': 'smi:nakamura81/event/00003'},[earthquake],[deep moonquake],"[{'stationCode': 'S11', 'networkCode': 'XA', '...",[{'publicID': 'smi:nakamura81/amplitude/S11/00...
3,{'publicID': 'smi:nakamura81/event/00004'},[meteorite],[meteoroid],"[{'stationCode': 'S11', 'networkCode': 'XA', '...",[{'publicID': 'smi:nakamura81/amplitude/S11/00...
4,{'publicID': 'smi:nakamura81/event/00005'},[meteorite],[meteoroid],"[{'stationCode': 'S11', 'networkCode': 'XA', '...",[{'publicID': 'smi:nakamura81/amplitude/S11/00...
...,...,...,...,...,...
13053,{'publicID': 'smi:nakamura81/event/13054'},[meteorite],[meteoroid],"[{'stationCode': 'S12', 'networkCode': 'XA', '...",[{'publicID': 'smi:nakamura81/amplitude/S12/13...
13054,{'publicID': 'smi:nakamura81/event/13055'},[meteorite],[meteoroid],"[{'stationCode': 'S12', 'networkCode': 'XA', '...",[{'publicID': 'smi:nakamura81/amplitude/S12/13...
13055,{'publicID': 'smi:nakamura81/event/13056'},[meteorite],[meteoroid],"[{'stationCode': 'S12', 'networkCode': 'XA', '...",[{'publicID': 'smi:nakamura81/amplitude/S12/13...
13056,{'publicID': 'smi:nakamura81/event/13057'},[not reported],[unclassified],"[{'stationCode': 'S14', 'networkCode': 'XA', '...",[{'publicID': 'smi:nakamura81/amplitude/S14/13...


In [10]:
for i in range(len(df['eventType'])):
    df['eventType'][i] = ' '.join(df['eventType'][i])
    df['eventDescription'][i] = ' '.join(df['eventDescription'][i])

In [11]:
# Save to CSV but does not retain lists in the columns
df.to_csv('events.csv')

In [12]:
# Pickle retains the lists properly
df.to_pickle('events.pkl')

In [22]:
df['picks'].to_excel("output.xlsx") 

In [76]:
# See which events have 0, 1, 2, 3 or 4 stations associated with them
none=0
one=0
two=0
three=0
four=0
for x in df['picks']:
    if len(x) == 0:
        none+=1
    
    if len(x) == 1:
        one+=1
        
    if len(x) == 2:
        two+=1
        
    if len(x) == 3:
        three+=1
        
    if len(x) == 4:
        four+=1

print(f'None:{none}, Unique:{one}, Two times:{two}, Three times:{three}, Four times:{four}')
print('Check that sum is 13085: ')
print(none+one+two+three+four)

None:14, Unique:5717, Two times:3746, Three times:2157, Four times:1424
Check that sum is 13085: 
13058


In [77]:
# See which events have 0,1,2,3 or 4 amplitudes associated with them
anone=0
aone=0
atwo=0
athree=0
afour=0
for x in df['amplitudes']:
    
    if len(x) == 0:
        anone+=1
    
    if len(x) == 1:
        aone+=1
        
    if len(x) == 2:
        atwo+=1
        
    if len(x) == 3:
        athree+=1
        
    if len(x) == 4:
        afour+=1
print(f'None:{anone}, Unique:{aone}, Two times:{atwo}, Three times:{athree}, Four times:{afour}')
print('Check that sum is 13085: ')
print(anone+aone+atwo+athree+afour)

None:14, Unique:5806, Two times:3760, Three times:2232, Four times:1246
Check that sum is 13085: 
13058


In [178]:
df.to_excel('events.xlsx')