# Data Validation Report: Comparing Pressure Measurements from BOTPT and BPR
#### Evaluators: Jazlyn Natalie, Dax Soule

In this report, we analyze seafloor pressure measurements taken at The Axial Seamount's Central Caldera (RS03CCAL) by two co-located instruments: BOTPT (Bottom Pressure and Tilt Meter) and BPR. We selected December 30, 2014 and February 1, 2015 to be the start and end study time for this report. The original plan was to look at data from January 1 to February 1, 2015, but there is an issue with the January 1 data that corrupted the file associated with that day. However, moving the start time to December 30, 2014 rectified the problem.     

In [None]:
# Load map of Axial caldera showing locations of BOTPT instruments (red circles)
from IPython.display import Image
Image(url = "https://www.pmel.noaa.gov/eoi/rsn/Axial-2017-OOI-caldera-ed-sm.png")

In [None]:
#Zoomed in map of Axial caldera
from IPython.display import Image
Image(url = "https://www.pmel.noaa.gov/eoi/rsn/Axial-2017-OOI-zoom-ed-sm.png")

### API Information Setup

In [None]:
# USERNAME = 'OOIAPI-BI8MMX3Y14S4FL'
# TOKEN =  'TEMP-TOKEN-MK0PEMRS0CFA8Z'

### Import Python Libraries 

In [None]:
# First, we need to add some more Python libraries
import requests
import datetime
import time
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import matplotlib.dates as dates
import pickle as pk

In [None]:
import xarray as xr
import re
import requests
import os
import gc

### Downloading Data and Metadata

In [None]:
# API Login Information
username = 'OOIAPI-BI8MMX3Y14S4FL'
token = 'TEMP-TOKEN-MK0PEMRS0CFA8Z'

# Sensor Base URL
base_url = 'https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/'

# Instrument Vocabulary
subsite = 'RS03CCAL'
node = 'MJ03F'
sensor = '05-BOTPTA301'
method = 'streamed'
stream = 'botpt_nano_sample'
beginDT = '2014-12-30T01:01:01.000Z'
endDT = '2015-02-01T01:01:01.000Z'

In [None]:
# Create the request URL
data_request_url ='/'.join((base_url,subsite,node,sensor,method,stream))

# All of the following are optional
params = {
    'beginDT':beginDT,
    'endDT':endDT,   
}

In [None]:
print(data)

## Annotations

In [None]:
USERNAME = 'OOIAPI-BI8MMX3Y14S4FL'
TOKEN = 'TEMP-TOKEN-MK0PEMRS0CFA8Z'

# Specify a reference designator
refdes = 'RS03ASHS-MJ03B-09-BOTPTA304'

# Specify a time range of interest
# begin = int(datetime.date(2017,1,1).strftime('%s'))*1000
# end = int(datetime.date(2018,1,1).strftime('%s'))*1000

# Alternatively, if you want all annotations, use these dates
# beginDT = '2014-12-30T01:01:01.000Z'
# endDT = '2015-02-01T01:01:01.000Z'
begin = int(datetime.date(2014,12,30).strftime('%s'))*1000
end = int(datetime.date(2015,2,1).strftime("%s")) * 1000 # current date

In [None]:
ANNO_API = 'https://ooinet.oceanobservatories.org/api/m2m/12580/anno/find'
params = {
  'beginDT': begin,
  'endDT': end,
  'refdes': refdes,
}

# Send the request
r = requests.get(ANNO_API, params=params, auth=(USERNAME, TOKEN))
data = r.json()

In [None]:
# convert timestamps
def convert_time(ms):
    if ms != None:
        return datetime.datetime.utcfromtimestamp(ms/1000)
    else:
        return None

In [None]:
# Convert info the json response to a dataframe for easier visualization
# no stream specification means it's specific for the instrument 
df = pd.DataFrame() # Setup empty array
for d in data:
    df = df.append({
        'annotation_id': d['id'],
        'subsite': d['subsite'],
        'node': d['node'],
        'sensor': d['sensor'],
        'stream': d['stream'],
        'method': d['method'],
        'start': convert_time(d['beginDT']),
        'stop': convert_time(d['endDT']),
        'qcFlag': d['qcFlag'],
        'annotation': d['annotation']
    }, ignore_index=True)
pd.set_option('display.max_colwidth', -1) # Show the full annotation text
df

WARNING:
Data request lines are commented out to prevent accidental resubmission when running through the entire notebook quickly.

## Data Request

In [None]:
# Data Request Line
r = requests.get(data_request_url, params=params, auth=(username, token))
data = r.json()

In [None]:
# print(data['allURLs'][0])

In [None]:
# %%time
# check_complete = data['allURLs'][1] + '/status.txt'
# for i in range(1800): 
#     r = requests.get(check_complete)
#     if r.status_code == requests.codes.ok:
#         print('request completed')
#         break
#     else:
#         time.sleep(1)

In [None]:
url='https://opendap.oceanobservatories.org/thredds/catalog/ooi/jazlynnatalie12@gmail.com/20180718T210007-RS03CCAL-MJ03F-05-BOTPTA301-streamed-botpt_nano_sample/catalog.html'
tds_url = 'https://opendap.oceanobservatories.org/thredds/dodsC'
datasets = requests.get(url).text
urls = re.findall(r'href=[\'"]?([^\'" >]+)', datasets)
x = re.findall(r'(ooi/.*?.nc)', datasets)
for i in x:
    if i.endswith('.nc') == False:
        x.remove(i)
for i in x:
    try:
        float(i[-4])
    except:
        x.remove(i)
datasets = [os.path.join(tds_url, i) for i in x]

In [None]:
print(datasets)

### Create Output Directory

In [None]:
# make the output directory
new_dir = '15s_mean_data/'
if not os.path.isdir(new_dir):
    try:
        os.makedirs(new_dir)
    except OSError:
        if os.path.exists(new_dir):
            pass
        else:
            raise

### Downsample data

In [None]:
# read in the data directly off THREDDS and write out as subsampled pickled pandas dataframe
# NOTE: It takes about one hour to subsample 69499.81 Mbytes of data and write it out to a dataframe. 
num = 0
for i in datasets:
    print('Downsampling file {} of {}'.format(str(num + 1), str(len(datasets))))
    ds = xr.open_dataset(i)
    ds = ds.swap_dims({'obs': 'time'})
    
    botpt = pd.DataFrame()
    botpt['bottom_pressure'] = ds['bottom_pressure'].to_pandas().resample('15S').mean()
    del botpt.index.name

    botpt = botpt.dropna()

    out = '15s_mean_data/' + i.split('/')[-1][:-3] + '_resampled' + '.pd'
    num = num +1

    with open(out, 'wb') as fh:
        pk.dump(botpt,fh)

    gc.collect()
print('Complete!')

### Aggregrate downsampled data

In [None]:
# create a single file with all the pickled data.
botpt = pd.DataFrame()
for path, subdirs, files in os.walk('15s_mean_data/'):
#     files = files[1:]
    for name in files:
        file_name = os.path.join(path, name) 
        with open(file_name, 'rb') as f:
            pd_df = pk.load(f)
            botpt = botpt.append(pd_df)

botpt.index.name = 'time'
botpt = botpt.sort_values(by=['time'])

with open('botpt.pd', 'wb') as f:
    pk.dump(botpt,f)

In [None]:
# to open the first file
x = xr.open_dataset(datasets[0])
print(x)

In [None]:
# obtain more information on data variables (specify by adding .units, .comment, etc)
x['bottom_pressure'].units

psia = abbreviation for pressure units of pound(s)-force per square inch absolute. This pressure is referenced relative to a vacuum and includes the pressures exerted by both the water column and the atmosphere.

### Missing data 
The January 16, 2015 BOTPT data is missing. 

### Timeseries Plot of BOTPT

In [None]:
fig, (ax) = plt.subplots(figsize=(10,5))
botpt['bottom_pressure'].plot(axes=ax);
plt.ylabel('P (psia)', fontsize = 15, labelpad = 12)
plt.xlabel('time', fontsize = 15, labelpad = 12)
plt.title('BOTPT', fontsize = 15)
fig.savefig('data_eval_plots/botpt.svg', format='svg', dpi=1200)

### Converting pressure measurements from BOTPT to depth

In [None]:
botpt_corrected = (botpt['bottom_pressure']*.67)
botpt_mean = botpt_corrected.mean()
botpt_diff = botpt_corrected - botpt_mean
print(botpt_corrected)
botpt.tail()

In [None]:
fig, (ax) = plt.subplots(figsize=(10,5))
botpt_corrected.plot(axes=ax)
plt.ylabel('depth (m)', fontsize = 15, labelpad = 12)
plt.xlabel('time', fontsize = 15, labelpad = 12)
plt.title('BOTPT', fontsize = 15)
# fig.savefig('data_eval_plots/depbotpt.svg', format='svg', dpi=1200)

### Importing BPR datasets 

In [None]:
bpr = xr.open_dataset('test1.nc')
bpr = bpr.swap_dims({'index': 'time'})
# df = botpt_bpr.to_dataframe()
# df.head()

### BPR measurements (already) in depth

In [None]:
fig, (ax) = plt.subplots(figsize=(10,5))
bpr['DriftCorrRawDep'].plot(ax=ax);
plt.ylabel('depth (m)', fontsize = 15, labelpad = 12)
plt.xlabel('time', fontsize = 15, labelpad = 12)
plt.title('BPR', fontsize = 15)
fig.savefig('data_eval_plots/bpr.svg', format='svg', dpi=1200)

### Compare BOTPT and BPR measurements

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
# ax.plot(botpt_corrected,label='BOTPT')
botpt_corrected.plot(ax=ax);
bpr['DriftCorrRawDep'].plot(ax=ax);
ax.set_ylabel('depth',fontsize = 15, labelpad = 12)
ax.set_xlabel('time', fontsize = 15, labelpad = 12)
plt.legend(['BOTPT', 'BPR']);
plt.title('BOTPT vs BPR', fontsize = 15)
# fig.savefig('data_eval_plots/botptvsbpr.svg', format='svg', dpi=1200)

### Reduce each time series to the mean

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
# ax.plot(t,z-zm)
x = ax.plot(botpt_corrected - botpt_corrected.mean(),label='BOTPT')
y = plt.plot(bpr['time'],bpr['DriftCorrRawDep']-bpr['DriftCorrRawDep'].mean())
# x = ax.plot(botpt_corrected - botpt_corrected.mean(),label='BOTPT')
ax.set_ylabel('depth',fontsize = 15, labelpad = 12)
ax.set_xlabel('time', fontsize = 15, labelpad = 12)
plt.title('Reduced Mean BOTPT vs BPR', fontsize = 15)
plt.legend(['BOTPT', 'BPR']);
# ax.plot(botpt_bpr - botpt_bpr.mean())
# ax.plot(botpt_bpr['DriftCorrRawDep'] - zm,label='BOTPT at 1526 m')
# # ax.set_xlim(datetime.date(2015,1,1),datetime.date(2015,1,5))

# # # plt.savefig('data_eval_plots.eps', )
# # fig.savefig('data_eval_plots/meanofbotp.svg', format='svg', dpi=1200)

The BPR data obtained starts on January 1, 2015, thus the lack of data on the graph prior to that date. 

In [None]:
type(botpt_corrected)

Identify start and end time for each plotted vector

In [None]:
# botpt_bpr['time'][0]

### Zooming in on the graph

In [None]:
t = bpr['time']
z = bpr['DriftCorrRawDep']
zm = z.mean()
zdiff = z - zm

# # v = botpt_corrected
# c = botpt_corrected['bottom_pressure']
# cm = c.mean()

In [None]:
a = bpr.to_dataframe()
b = pd.DataFrame(botpt_corrected)
c = pd.merge(a, b, how='inner',left_index=True,right_index=True)

In [None]:
# c['DriftCorrRawDep'].plot()
# c['bottom_pressure'].plot()
fig,ax = plt.subplots(1,1,figsize=(14,8))
m1 = c['DriftCorrRawDep'].mean()
m2=c['bottom_pressure'].mean()
ax.plot(c.index, c['DriftCorrRawDep'] - m1,marker='.',markersize=1,linestyle=None,label='bpr')
ax.plot(c.index, c['bottom_pressure'] - m2,marker='.',markersize=1,label='botpt',linestyle=None)
plt.legend()
ax.set_xlim(datetime.datetime(2015,1,8,1,0,0),datetime.datetime(2015,1,8,2,0,0))

### Calculating Difference


In [None]:
plt.plot(c.index,(c['DriftCorrRawDep']-m1) - (c['bottom_pressure']-m2));

In [None]:
print (botpt['bottom_pressure'].values)

## Conclusions: 
1. The reduced means of BOTPT and BPR match almost perfectly.   
2. BPR measurements fill in the missing gap (January 16, 2015) nicely when the two graphs are overlapped. 

#### Next step: Do analysis on longer timeseries. 