In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 8)

In [2]:
from IPython import display
display.set_matplotlib_formats('retina')

###### Columns: 
###### 1-14   Source Time (GMT)  YYYYDDDHHMMSSS 
###### 16-17   Number of recording hydrophones 
###### 18-32   ID's of recording hydrophones (in order received) 
###### 34-41   Epicenter Latitude  (Degrees, N=+, S=-) 
###### 42-50   Epicenter Longitude (Degrees, E=+, W=-) 
###### 51-57   Estimated error in latitude (degrees) 
###### 58-64   Estimated error in longitude (degrees) 
###### 65-71   Estimated error in source time (seconds) 
###### 72-79   Source Magnitude (dB re 1 micro-Pa @ 1m) 

In [3]:
col_names = ['source_time', 'h_count', 'h_id', 'ep_lat', 'ep_lon', 'er_lat', 'er_lon',
        'er_time', 'mdb', 'mdb2','event' ,'notes'] 

In [None]:
#!ls /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output

In [None]:
#!head /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/data_2019090.txt

In [None]:
!cat /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/data* > /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/newDays.txt

In [None]:
!tail /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/newDays.txt

In [None]:
!sed 's/,/ /g' /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/newDays.txt  > /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
!sed -i -e "s/ /,/g" /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
!sed -i -e "s/,,,,,,,,,,,/,/g" /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i -e "s/,,,,,,,,,,/,/g" /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
!sed -i -e "s/,,,/,/g" /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
!sed -i -e "s/,,/,/g" /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
!sed -i -e 's/,/ /12g' /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i -e 's/Q/Q,/g' /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i -e 's/K/K,/g' /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i -e 's/-/ /g' /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i -e 's/_//g' /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i -e "s/, /,/g" /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i -e "s/,,/,/g" /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i -e "s/,  /, no notes/g" /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i 's/^\(.\{4\}\)/\1,/' /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt
#!sed -i 's/^\(.\{8\}\)/\1,/' /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/dax_2020012.txt
#!sed -i 's/^\(.\{11\}\)/\1,/' /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/dax_2020012.txt
!head /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt

In [None]:
!tail /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays.txt

In [None]:
!cat /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDays* > /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDaysCat.txt

In [4]:
path = '/home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/df_allDaysCat.txt' # use your path

eqs = pd.read_csv(path, sep=',', header=0, names=col_names,
                  dtype = {'source_time':np.str,'h_count': np.float64,'eplat_deg': np.float64, 'eplon_deg': np.float64,
                           'er_lat': np.float64,'er_lon': np.float64,
                           'er_time': np.float64,'M_db': np.float64,
                           'Mdb2': np.float64, 'notes': np.str})
eqs['source_time'].iloc[0][0:4]
eqs.drop_duplicates(subset='source_time', keep='first', inplace=True, ignore_index=False)
eqs.head(10)

Unnamed: 0,source_time,h_count,h_id,ep_lat,ep_lon,er_lat,er_lon,er_time,mdb,mdb2,event,notes
0,20192070244133,3.0,654.0,-62.344,-57.047,0.062,0.212,6.484,187.82,188.1,IQ,in network NE corner
1,20192070306482,4.0,2314.0,-63.124,-58.867,0.012,0.011,0.781,195.79,196.1,IQ,out of network mid basin
2,20192070322144,3.0,324.0,-62.786,-58.856,0.02,0.034,1.039,190.89,191.1,IQ,in network middle
3,20192070327068,3.0,654.0,-62.45,-56.766,0.062,0.283,9.892,190.51,190.7,IQ,out of network
4,20192070334099,4.0,4325.0,-63.001,-58.121,0.015,0.022,1.073,191.83,192.0,IQ,out of network mid basin
5,20192070338509,3.0,123.0,-62.919,-60.567,0.19,1.056,38.614,190.1,190.3,IQ,in network SW corner
6,20192070353466,4.0,2134.0,-62.866,-59.53,0.1,0.079,2.519,190.92,191.2,IQ,in network SE network
7,20192070415502,4.0,4563.0,-63.218,-57.205,1.144,1.109,90.671,202.84,203.1,IQ,LT
8,20192070419346,3.0,123.0,-62.443,-60.253,0.056,0.11,4.894,188.25,188.5,IDK,LF signals M1
9,20192070524482,3.0,432.0,-62.811,-58.07,0.048,0.088,3.596,192.28,192.2,IQ,in network middle


In [5]:
eqs['time'] = pd.to_datetime(eqs['source_time'], format='%Y%j%H%M%S%f')

eqs.set_index('time', inplace=True, verify_integrity= True)
eqs.drop('source_time', axis=1, inplace=True)
eqs.head()

Unnamed: 0_level_0,h_count,h_id,ep_lat,ep_lon,er_lat,er_lon,er_time,mdb,mdb2,event,notes
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-07-26 02:44:13.300,3.0,654.0,-62.344,-57.047,0.062,0.212,6.484,187.82,188.1,IQ,in network NE corner
2019-07-26 03:06:48.200,4.0,2314.0,-63.124,-58.867,0.012,0.011,0.781,195.79,196.1,IQ,out of network mid basin
2019-07-26 03:22:14.400,3.0,324.0,-62.786,-58.856,0.02,0.034,1.039,190.89,191.1,IQ,in network middle
2019-07-26 03:27:06.800,3.0,654.0,-62.45,-56.766,0.062,0.283,9.892,190.51,190.7,IQ,out of network
2019-07-26 03:34:09.900,4.0,4325.0,-63.001,-58.121,0.015,0.022,1.073,191.83,192.0,IQ,out of network mid basin


In [6]:
eqs.describe()

Unnamed: 0,h_count,h_id,ep_lat,ep_lon,er_lat,er_lon,er_time,mdb
count,5164.0,5059.0,5164.0,5164.0,5164.0,5164.0,5164.0,5164.0
mean,131.420023,141540.419252,7660.242725,46.716655,-0.974998,0.40885,10.465888,191.629577
std,9105.307513,204637.032661,58548.961999,34.826621,8.361849,2.101616,20.128944,27.328312
min,3.0,61.994,-68.975,-77.488,-63.568,0.0,0.0,0.505
25%,4.0,2134.0,62.258,57.109,0.025,0.03,1.238,189.92
50%,5.0,23456.0,62.6345,58.064,0.053,0.052,3.007,194.86
75%,6.0,213456.0,62.96,59.01525,0.143,0.181,8.94025,199.7625
max,654321.0,654321.0,654321.0,79.366,68.835,61.273,207.31,230.46


In [8]:
data_dir = '/home/jovyan/data/bravoseis_data'

In [9]:
df_allEvents = pd.read_csv(data_dir + '/noaa_hydroacoustic/seasick_output/eqs.csv',
                  sep=',',index_col='time')
df_allEvents['ep_lat']=df_allEvents['ep_lat']*-1
df_allEvents['ep_lon']=df_allEvents['ep_lon']*-1
df_allEvents.head()

Unnamed: 0_level_0,h_count,h_id,ep_lat,ep_lon,er_lat,er_lon,er_time,mdb,mdb2,event,notes,day,month,year
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2019-07-26 02:44:13.300,3.0,654.0,62.344,57.047,0.062,0.212,6.484,187.82,188.1,IQ,in network NE corner,26,7,2019
2019-07-26 03:06:48.200,4.0,2314.0,63.124,58.867,0.012,0.011,0.781,195.79,196.1,IQ,out of network mid basin,26,7,2019
2019-07-26 03:22:14.400,3.0,324.0,62.786,58.856,0.02,0.034,1.039,190.89,191.1,IQ,in network middle,26,7,2019
2019-07-26 03:27:06.800,3.0,654.0,62.45,56.766,0.062,0.283,9.892,190.51,190.7,IQ,out of network,26,7,2019
2019-07-26 03:34:09.900,4.0,4325.0,63.001,58.121,0.015,0.022,1.073,191.83,192.0,IQ,out of network mid basin,26,7,2019


In [10]:
df_allEvents.describe()

Unnamed: 0,h_count,h_id,ep_lat,ep_lon,er_lat,er_lon,er_time,mdb,day,month,year
count,5164.0,5059.0,5164.0,5164.0,5164.0,5164.0,5164.0,5164.0,5164.0,5164.0,5164.0
mean,131.420023,141540.419252,-7660.242725,-46.716655,-0.974998,0.40885,10.465888,191.629577,15.942874,6.821069,2019.10244
std,9105.307513,204637.032661,58548.961999,34.826621,8.361849,2.101616,20.128944,27.328312,9.703648,2.638261,0.303255
min,3.0,61.994,-654321.0,-79.366,-63.568,0.0,0.0,0.505,1.0,1.0,2019.0
25%,4.0,2134.0,-62.96,-59.01525,0.025,0.03,1.238,189.92,7.0,6.0,2019.0
50%,5.0,23456.0,-62.6345,-58.064,0.053,0.052,3.007,194.86,15.0,7.0,2019.0
75%,6.0,213456.0,-62.258,-57.109,0.143,0.181,8.94025,199.7625,25.0,8.0,2019.0
max,654321.0,654321.0,68.975,77.488,68.835,61.273,207.31,230.46,31.0,12.0,2020.0


In [None]:
# eqs['time'].iloc[0].to_julian_date()

# jul_days = []
# for row in eqs.iterrows():
#     jul_days.append(row[1]['time'].to_julian_date())

# eqs['julian_date'] =  jul_days

In [13]:
df_eqs1 = df_allEvents.loc[df_allEvents['event'] == 'EQ'].copy()# .copy creates its own dataframe 
cond1 = df_eqs1["ep_lat"] < -62   
cond2 = df_eqs1["ep_lat"] > -63.5
cond3 = df_eqs1["ep_lon"] > -61
cond4 = df_eqs1["ep_lon"] < -56.5

df_eqs1 = df_eqs1[cond1 & cond2 & cond3 & cond4].copy()
df_eqs1.describe()

Unnamed: 0,h_count,h_id,ep_lat,ep_lon,er_lat,er_lon,er_time,mdb,day,month,year
count,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0
mean,4.72549,190190.464052,-62.490363,-58.050382,0.098281,0.155709,7.182255,199.180229,14.189542,5.382353,2019.323529
std,1.213235,250761.489771,0.331559,1.033208,0.138406,0.278647,11.329078,7.404021,9.210295,3.446198,0.468589
min,3.0,123.0,-63.451,-60.945,0.0,0.0,0.015,178.91,1.0,1.0,2019.0
25%,4.0,2134.0,-62.7105,-58.732,0.025,0.032,1.121,194.67,5.0,1.0,2019.0
50%,5.0,45632.0,-62.381,-57.758,0.0475,0.055,2.846,198.915,12.0,6.0,2019.0
75%,6.0,344541.0,-62.22625,-57.1315,0.128,0.163,8.558,203.88,22.0,8.0,2020.0
max,6.0,654321.0,-62.027,-56.607,1.426,3.233,99.999,225.31,31.0,12.0,2020.0


In [14]:
df_eqs2 = eqs.loc[df_allEvents['event'] == 'EQ'].copy()# .copy creates its own dataframe 
cond1 = df_eqs2["ep_lat"] < -62   
cond2 = df_eqs2["ep_lat"] > -63.5
cond3 = df_eqs2["ep_lon"] > -61
cond4 = df_eqs2["ep_lon"] < -56.5

df_eqs2 = df_eqs2[cond1 & cond2 & cond3 & cond4].copy()
df_eqs2.describe()

Unnamed: 0,h_count,h_id,ep_lat,ep_lon,er_lat,er_lon,er_time,mdb
count,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0
mean,4.25,25006.125,-62.409125,-57.50075,0.19425,0.654125,23.885375,203.09
std,0.707107,28278.811313,0.326553,1.167403,0.144381,0.666874,22.866354,4.851536
min,3.0,123.0,-62.973,-60.023,0.022,0.053,1.321,198.81
25%,4.0,6421.0,-62.45875,-57.6845,0.09275,0.24225,9.77125,199.49
50%,4.0,6482.0,-62.298,-57.0555,0.154,0.391,13.1135,200.635
75%,5.0,48800.0,-62.253,-56.78475,0.28825,0.94125,35.436,206.7725
max,5.0,65432.0,-62.035,-56.575,0.408,2.028,68.606,211.91


In [None]:
path = '/home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/eqs.pkl'
eqs.to_pickle(path)

In [None]:
path = '/home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/eqs.csv'

eqs.to_csv(path, sep=',', na_rep='',
                 float_format=None, columns=None, header=True,
                 index=True, index_label=None, mode='w', encoding=None,
                 compression='infer', quoting=None, quotechar='"',
                 line_terminator=None, chunksize=None, date_format=None,
                 doublequote=True, escapechar=None, decimal='.',
                 errors='strict')

In [None]:
!ls /home/jovyan/data/bravoseis_data/noaa_hydroacoustic/seasick_output/