# Example Analysis

In [1]:
import xarray as xr
import pandas as pd
import dask.dataframe as dd
import hvplot.pandas  # noqa
import hvplot.dask  # noqa
import numpy as np
from scipy import stats

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Build Dask Cluster
1. Use gui interface to create a new cluster with ~10 workers
2. Use < > to insert an "import Client" statement. This is critical because it is how your script knows to use yoru cluster. 
3. Execute client cell
4. Execute your Dask Cell. Note that once you have a cluster running you do not need to re-import the client. 
5. When finished, always shut down your cluster. 

In [None]:
from dask.distributed import Client

client = Client("tcp://10.0.129.212:43493")
client

## Read in RS03ECAL-MJ03E-06-BOTPTA302 Data

In [35]:
#!head /home/jovyan/data/botpt/RS03ECAL-MJ03E-06-BOTPTA302/deployment0001_RS03ECAL-MJ03E-06-BOTPTA302-streamed-botpt_nano_sample_20140904T000000-20141128T060000.100000.nc\#fillmisma_resampled.nc

In [36]:
# ds =xr.open_dataset('/home/jovyan/data/botpt/RS03ECAL-MJ03E-06-BOTPTA302/deployment0001_RS03ECAL-MJ03E-06-BOTPTA302-streamed-botpt_nano_sample_20191012T060000-20191014T235959.950000_resampled.nc')
# ds

In [37]:
ds1 = xr.open_mfdataset('/home/jovyan/data/botpt/RS03ECAL-MJ03E-06-BOTPTA302/*_resampled.nc', parallel=True).chunk(10080)
ds1['bottom_pressure_eastern'] = ds1['bottom_pressure']
del ds1['bottom_pressure']
ds1

<xarray.Dataset>
Dimensions:                  (index: 2368081)
Coordinates:
  * index                    (index) datetime64[ns] 2015-02-09T05:59:00 ... 2019-10-14T23:59:00
Data variables:
    bottom_pressure_eastern  (index) float32 dask.array<chunksize=(10080,), meta=np.ndarray>

## Read in Central Caldera

In [38]:
# ds =xr.open_dataset('/home/jovyan/data/botpt/RS03CCAL-MJ03F-05-BOTPTA301/deployment0001_RS03CCAL-MJ03F-05-BOTPTA301-streamed-botpt_nano_sample_20140904T000000-20141128T115959.950000_resampled.nc')
# ds

In [39]:
ds2 = xr.open_mfdataset('/home/jovyan/data/botpt/RS03CCAL-MJ03F-05-BOTPTA301/*_resampled.nc', parallel=True).chunk(10080)
ds2['bottom_pressure_central'] = ds2['bottom_pressure']
del ds2['bottom_pressure']
ds2

<xarray.Dataset>
Dimensions:                  (index: 2606374)
Coordinates:
  * index                    (index) datetime64[ns] 2014-09-04 ... 2019-10-14T23:59:00
Data variables:
    bottom_pressure_central  (index) float32 dask.array<chunksize=(10080,), meta=np.ndarray>

## Create DataFrame

In [40]:
#df = ds1.to_dask_dataframe()
df1 = ds1.to_dataframe()
df2= ds2.to_dataframe()
del ds1
del ds2

In [41]:
df1['Datetime']=np.datetime_as_string(df1.index)
df2['Datetime']=np.datetime_as_string(df2.index)

In [42]:
df1['Datetime']=df1['Datetime'].str[:19]
df2['Datetime']=df2['Datetime'].str[:19]

In [43]:
df_botpt=pd.merge(df1,df2, on = 'Datetime')
del df1
del df2
df_botpt.tail()

Unnamed: 0,bottom_pressure_eastern,Datetime,bottom_pressure_central
2362766,2240.960938,2019-10-14T23:55:00,2254.007324
2362767,2240.949463,2019-10-14T23:56:00,2253.995605
2362768,2240.938232,2019-10-14T23:57:00,2253.984375
2362769,2240.925781,2019-10-14T23:58:00,2253.970215
2362770,2240.910156,2019-10-14T23:59:00,2253.955811


## trimming data frame

In [44]:
df_botpt= df_botpt.set_index(pd.to_datetime(df_botpt['Datetime']))
del df_botpt['Datetime']
df_botpt['depthDiff']= df_botpt['bottom_pressure_eastern'] - df_botpt['bottom_pressure_central']
df_botpt =df_botpt.loc['2015-05-01 00:00:00':'2019-10-14 00:00:00']
df_botpt['spikes']=(df_botpt['depthDiff'] - df_botpt['depthDiff'].rolling(1000).median()).abs() < 0.1
df_botpt['cleanDiff']=df_botpt['depthDiff'].where(df_botpt.spikes)
#df_botpt['bottom_pressure_eastern']= df_botpt.bottom_pressure_eastern.round(3)
#df_botpt['bottom_pressure_central']= df_botpt.bottom_pressure_central.round(3)
#df_botpt.sample(100)
df_botpt.head()

Unnamed: 0_level_0,bottom_pressure_eastern,bottom_pressure_central,depthDiff,spikes,cleanDiff
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-05-01 00:00:00,2241.314697,2256.084717,-14.77002,False,
2015-05-01 00:01:00,2241.314941,2256.085938,-14.770996,False,
2015-05-01 00:02:00,2241.314209,2256.085938,-14.771729,False,
2015-05-01 00:03:00,2241.310303,2256.081543,-14.77124,False,
2015-05-01 00:04:00,2241.307129,2256.079834,-14.772705,False,


## Read in downsampled tide predictions from Eastern Caldera

In [45]:
file = '/home/jovyan/data/botpt/tidepredictions/tidesEasternCaldera.csv'
df = pd.read_csv(file)
df.columns = ['Datetime','bottom_pressure']
df= df.set_index(pd.to_datetime(df['Datetime']))
del df['Datetime']
df.head()

Unnamed: 0_level_0,bottom_pressure,EastTide
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-05-01 00:00:00,-0.672125,-0.045032
2015-05-01 00:01:00,-0.673025,-0.045093
2015-05-01 00:02:00,-0.6739,-0.045151
2015-05-01 00:03:00,-0.6747,-0.045205
2015-05-01 00:04:00,-0.675425,-0.045253


In [46]:
type(df['EastTide'])

pandas.core.series.Series

In [47]:
df_botpt=pd.merge(df_botpt,df, on = 'Datetime')
df_botpt['ECal_tideReduced']= df_botpt['bottom_pressure_eastern'] - df_botpt['EastTide']
df_botpt.head()

Unnamed: 0_level_0,bottom_pressure_eastern,bottom_pressure_central,depthDiff,spikes,cleanDiff,bottom_pressure,EastTide,ECal_tideReduced
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-05-01 00:00:00,2241.314697,2256.084717,-14.77002,False,,-0.672125,-0.045032,2241.35973
2015-05-01 00:01:00,2241.314941,2256.085938,-14.770996,False,,-0.673025,-0.045093,2241.360034
2015-05-01 00:02:00,2241.314209,2256.085938,-14.771729,False,,-0.6739,-0.045151,2241.35936
2015-05-01 00:03:00,2241.310303,2256.081543,-14.77124,False,,-0.6747,-0.045205,2241.355508
2015-05-01 00:04:00,2241.307129,2256.079834,-14.772705,False,,-0.675425,-0.045253,2241.352382


## Create Dask Data frame

## Plot RS03ECAL-MJ03E-06-BOTPTA302 using Dask

In [None]:
df_botpt.hvplot(x= 'Datetime', y=['bottom_pressure_eastern', 'bottom_pressure_central'], 
                datashade =True,
                height=200,
                flip_yaxis=True,
               subplots =True,
               shared_axes=False).cols(1)

## Plot RS03CCAL-MJ03F-05-BOTPTA301 using Dask

In [48]:
df_botpt.hvplot(y='cleanDiff', datashade =True, height=400,
                       flip_yaxis=False)

In [49]:
df_botpt.hvplot(y='ECal_tideReduced', datashade =True, height=400,
                       flip_yaxis=False)

### Read in tide data Eastern Caldera 

In [None]:
df_grav=dd.read_csv('/home/jovyan/data/bravoseis_data/SADO/jan_2019/gravimetro_bruto.proc/*.proc', 
               parse_dates=['fecha'], date_parser=dateparse, 
                    dtype = {'fecha': object,'status': np.float64,
                                'gravimetria_bruta': np.float64, 'spring_tension': np.float64,
                                'longitud': np.float64, 'latitud': np.float64,
                                'velocidad': np.float64,'rumbo': np.float64 })
#df.partitions[5].compute()
df_grav=df_grav.set_index("fecha")
del df_grav['fecha_telegrama']
del df_grav['rumbo']
del df_grav['velocidad']
del df_grav['spring_tension']
del df_grav['status']
df_grav.head()