# Example Analysis

In [26]:
import xarray as xr
import pandas as pd
import dask.dataframe as dd
import hvplot.pandas  # noqa
import hvplot.dask  # noqa
import numpy as np
from scipy import stats

In [27]:
import warnings
warnings.filterwarnings('ignore')

# Build Dask Cluster
1. Use gui interface to create a new cluster with ~10 workers
2. Use < > to insert an "import Client" statement. This is critical because it is how your script knows to use yoru cluster. 
3. Execute client cell
4. Execute your Dask Cell. Note that once you have a cluster running you do not need to re-import the client. 
5. When finished, always shut down your cluster. 

## Read in RS03ECAL-MJ03E-06-BOTPTA302 Data

In [29]:
#!head /home/jovyan/data/botpt/RS03ECAL-MJ03E-06-BOTPTA302/deployment0001_RS03ECAL-MJ03E-06-BOTPTA302-streamed-botpt_nano_sample_20140904T000000-20141128T060000.100000.nc\#fillmisma_resampled.nc

In [30]:
# ds =xr.open_dataset('/home/jovyan/data/botpt/RS03ECAL-MJ03E-06-BOTPTA302/deployment0001_RS03ECAL-MJ03E-06-BOTPTA302-streamed-botpt_nano_sample_20191012T060000-20191014T235959.950000_resampled.nc')
# ds

In [31]:
ds1 = xr.open_mfdataset('/home/jovyan/data/botpt/RS03ECAL-MJ03E-06-BOTPTA302/*_resampled.nc', parallel=True).chunk(10080)
ds1['bottom_pressure_eastern'] = ds1['bottom_pressure']
del ds1['bottom_pressure']
ds1

<xarray.Dataset>
Dimensions:                  (index: 2368081)
Coordinates:
  * index                    (index) datetime64[ns] 2015-02-09T05:59:00 ... 2019-10-14T23:59:00
Data variables:
    bottom_pressure_eastern  (index) float32 dask.array<chunksize=(10080,), meta=np.ndarray>

## Read in Central Caldera

In [32]:
# ds =xr.open_dataset('/home/jovyan/data/botpt/RS03CCAL-MJ03F-05-BOTPTA301/deployment0001_RS03CCAL-MJ03F-05-BOTPTA301-streamed-botpt_nano_sample_20140904T000000-20141128T115959.950000_resampled.nc')
# ds

In [33]:
ds2 = xr.open_mfdataset('/home/jovyan/data/botpt/RS03CCAL-MJ03F-05-BOTPTA301/*_resampled.nc', parallel=True).chunk(10080)
ds2['bottom_pressure_central'] = ds2['bottom_pressure']
del ds2['bottom_pressure']
ds2

<xarray.Dataset>
Dimensions:                  (index: 2606374)
Coordinates:
  * index                    (index) datetime64[ns] 2014-09-04 ... 2019-10-14T23:59:00
Data variables:
    bottom_pressure_central  (index) float32 dask.array<chunksize=(10080,), meta=np.ndarray>

## Create DataFrame

In [34]:
#df = ds1.to_dask_dataframe()
df1 = ds1.to_dataframe()
df2= ds2.to_dataframe()
del ds1
del ds2

In [35]:
df1['Datetime']=np.datetime_as_string(df1.index)
df2['Datetime']=np.datetime_as_string(df2.index)

In [36]:
df1['Datetime']=df1['Datetime'].str[:19]
df2['Datetime']=df2['Datetime'].str[:19]

In [37]:
df_botpt=pd.merge(df1,df2, on = 'Datetime')
del df1
del df2
df_botpt.tail()

Unnamed: 0,bottom_pressure_eastern,Datetime,bottom_pressure_central
2362766,2240.960938,2019-10-14T23:55:00,2254.007324
2362767,2240.949463,2019-10-14T23:56:00,2253.995605
2362768,2240.938232,2019-10-14T23:57:00,2253.984375
2362769,2240.925781,2019-10-14T23:58:00,2253.970215
2362770,2240.910156,2019-10-14T23:59:00,2253.955811


## trimming data frame

In [38]:
df_botpt= df_botpt.set_index(pd.to_datetime(df_botpt['Datetime']))
del df_botpt['Datetime']
df_botpt['depth_central']=df_botpt['bottom_pressure_central'].astype(float) * 0.670
df_botpt['depth_eastern']=df_botpt['bottom_pressure_eastern'].astype(float) * 0.670
df_botpt['depthDiff']= df_botpt['depth_eastern'] - df_botpt['depth_central']
df_botpt =df_botpt.loc['2015-05-01 00:00:00':'2019-10-14 00:00:00']
df_botpt['spikes']=(df_botpt['depthDiff'] - df_botpt['depthDiff'].rolling(1000).median()).abs() < 0.1
df_botpt['cleanDiff']=df_botpt['depthDiff'].where(df_botpt.spikes)
#df_botpt['bottom_pressure_eastern']= df_botpt.bottom_pressure_eastern.round(3)
#df_botpt['bottom_pressure_central']= df_botpt.bottom_pressure_central.round(3)
#df_botpt.sample(100)
df_botpt.head()

Unnamed: 0_level_0,bottom_pressure_eastern,bottom_pressure_central,depth_central,depth_eastern,depthDiff,spikes,cleanDiff
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-05-01 00:00:00,2241.314697,2256.084717,1511.57676,1501.680847,-9.895913,False,
2015-05-01 00:01:00,2241.314941,2256.085938,1511.577578,1501.681011,-9.896567,False,
2015-05-01 00:02:00,2241.314209,2256.085938,1511.577578,1501.68052,-9.897058,False,
2015-05-01 00:03:00,2241.310303,2256.081543,1511.574634,1501.677903,-9.896731,False,
2015-05-01 00:04:00,2241.307129,2256.079834,1511.573489,1501.675776,-9.897712,False,


## Read in downsampled tide predictions from Eastern Caldera

In [39]:
# file = '/home/jovyan/data/botpt/tidepredictions/tidesEasternCaldera.csv'
# df = pd.read_csv(file)
# df.columns = ['Datetime','bottom_pressure']
# df= df.set_index(pd.to_datetime(df['Datetime']))
# del df['Datetime']
# df.head()

In [40]:
# type(df['EastTide'])

In [41]:
# df_botpt=pd.merge(df_botpt,df, on = 'Datetime')
# df_botpt['ECal_tideReduced']= df_botpt['bottom_pressure_eastern'] - df_botpt['EastTide']
# df_botpt.head()

## Create Dask Data frame

## Plot RS03ECAL-MJ03E-06-BOTPTA302 using Dask

In [42]:
df_botpt.hvplot( y='depth_eastern', 
                datashade =True,
                height=300,
                flip_yaxis=True,
               title= 'BOTPT Eastern Caldera',
               ylabel= 'Depth (m)',
               xlabel = 'Time (s)' )

In [43]:
df_botpt.hvplot( y='depth_central', 
                datashade =True,
                height=300,
                flip_yaxis=True,
               title= 'BOTPT Central Caldera',
               ylabel= 'Depth (m)',
               xlabel = 'Time (s)' )

## Plot RS03CCAL-MJ03F-05-BOTPTA301 using Dask

In [44]:
#df_botpt.hvplot(y='cleanDiff', datashade =True, height=400,
                       flip_yaxis=False)

IndentationError: unexpected indent (<ipython-input-44-fdb63f86d44f>, line 2)

### Read downsampled seismic data 

In [62]:
file = '/home/jovyan/data/botpt/axial_seismic/axial_eq_daily.csv'
df = pd.read_csv(file)
df= df.set_index(pd.to_datetime(df['date']))
df.rename(columns = {'date':'Datetime'}, inplace = True)
df['Datetime'] = pd.to_datetime(df['Datetime']);
df.head()

Unnamed: 0_level_0,Datetime,Depth,MW,RMS,count
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-05-01,2015-05-01,1.194603,0.196984,0.045397,63.0
2015-05-02,2015-05-02,1.24619,0.240714,0.049524,42.0
2015-05-03,2015-05-03,0.995306,0.16551,0.04,49.0
2015-05-04,2015-05-04,1.247429,0.177143,0.047714,35.0
2015-05-05,2015-05-05,1.202955,0.201364,0.057273,44.0


### Resample BOTPT to daily mean

In [45]:
df_botpt_day = df_botpt.resample('d').mean()
df_botpt_day.head()

Unnamed: 0_level_0,bottom_pressure_eastern,bottom_pressure_central,depth_central,depth_eastern,depthDiff,spikes,cleanDiff
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-05-01,2242.32373,2257.087158,1512.24839,1502.356836,-9.891554,0.30625,-9.891172
2015-05-02,2242.330078,2257.091553,1512.25137,1502.361225,-9.890145,1.0,-9.890145
2015-05-03,2242.337402,2257.100586,1512.257444,1502.366116,-9.891329,1.0,-9.891329
2015-05-04,2242.327393,2257.088623,1512.249352,1502.359372,-9.88998,1.0,-9.88998
2015-05-05,2242.318604,2257.08252,1512.245328,1502.353533,-9.891795,1.0,-9.891795


In [49]:
df_botpt_day.hvplot(y='depth_eastern',
                datashade =False,
                height=300,
                flip_yaxis=True,
               title= 'BOTPT Eastern Caldera',
               ylabel= 'Depth (m)',
               xlabel = 'Time (d)')

In [60]:
df_botpt_day.hvplot(y='cleanDiff',
                datashade =False,
                height=300,
                flip_yaxis= False,
                title= 'BOTPT Depth Difference',
                ylabel= 'Depth Diff (m)',
                xlabel = 'Time (d)',
                ylim = (-10.0, -8.5))

In [61]:
df.hvplot(y='count',
                datashade =False,
                height=300,
                flip_yaxis= False,
                title= 'Earthquakes per day',
                ylabel= 'EQs per day',
                xlabel = 'Time (d)',
                ylim = (0, 300))

In [66]:
df.hvplot.scatter(y = 'Depth',c='RMS',
                datashade =False,
                height=300,
                flip_yaxis= False,
                title= 'Focal Depths',
                ylabel= 'Depth (km)',
                xlabel = 'Time (d)')

In [131]:
df.hvplot.scatter(x= 'Depth', y= 'count')

In [93]:
df_botptM=pd.merge(df,df_botpt_day, on = 'Datetime')
df_botptM.head()

Unnamed: 0,Datetime,Depth,MW,RMS,count,bottom_pressure_eastern,bottom_pressure_central,depthDiff,spikes,cleanDiff
0,2015-05-01,1.194603,0.196984,0.045397,63.0,2242.32373,2257.087158,-14.763514,0.30625,-14.762942
1,2015-05-02,1.24619,0.240714,0.049524,42.0,2242.330078,2257.091553,-14.761411,1.0,-14.761411
2,2015-05-03,0.995306,0.16551,0.04,49.0,2242.337402,2257.100586,-14.763177,1.0,-14.763177
3,2015-05-04,1.247429,0.177143,0.047714,35.0,2242.327393,2257.088623,-14.761164,1.0,-14.761164
4,2015-05-05,1.202955,0.201364,0.057273,44.0,2242.318604,2257.08252,-14.763873,1.0,-14.763873
