# Compare barometric pressure recorded by analog barometers and vibrating wire sensors
Here we use the 4-hourly summary data.
* Analog barometers are temperature-corrected, so can be considered a direct measure of true barometric pressure. However, the still need correcting to a height of 0 feet.
* Vibrating wire sensors need to be corrected with the corresponding temperature data, and height corrected to 0 feet.

## 1. Set up and Load transducers metadata

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import datetime as dt
import header
paths = header.setup_environment()
import platform
if platform.system()=='Darwin':
    INPUTDIR = '/Users/thompsong/Dropbox/PROFESSIONAL/RESEARCH/3_Project_Documents/NASAprojects/201602_Rocket_Seismology/DATA/2022_DATA/WellData/MERGED'
else:
    INPUTDIR = os.path.join(paths['new_data'], '03_merge_inventories')
import libWellData as LLE
transducersDF = LLE.get_transducers_dataframe(paths)
display(transducersDF)


# 2. Subset the summary of all files dataframe by Baro, 20 Hz, and 100 Hz subdirectories
Display the columns of each, after dropping empty columns.
* Baro only contains data columns for AirPressureShallow and AirPressureDeep.
* 20 Hz contains data columns for 1226423 and 2151692, plus corresponding temperature and stdev data
* 100 Hz contains data columns for '1226421', '1226419', '1226420', '2149882','2151691', and '1226429', plus corresponding temperature and stdev data

We plot:
- the barometric data
- the 100 Hz temperature data

Since none of these needs to be corrected (although later we will adjust barometric data to a height of 0 feet)

In [None]:
# Load in the summary of all files - raw 4 hourly data 
dfall2 = pd.read_csv(os.path.join(INPUTDIR, 'all2.csv'))

# Split in baro, 20Hz, 100Hz
dfbaro = dfall2.copy()[dfall2['subdir']=="Baro"]
dfbaro['datetime'] = pd.to_datetime(dfbaro['TIMESTAMP'])
dfbaro.dropna(how='all', axis=1, inplace=True) 
dfbaro.drop(dfbaro.columns[dfbaro.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)

df20hz = dfall2.copy()[dfall2['subdir']=="20hz"]
df20hz['datetime'] = pd.to_datetime(df20hz['TIMESTAMP'])
df20hz.dropna(how='all', axis=1, inplace=True) 
df20hz.drop(df20hz.columns[df20hz.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)

df100hz = dfall2.copy()[dfall2['subdir']=="100hz"]
df100hz['datetime'] = pd.to_datetime(df100hz['TIMESTAMP'])
df100hz.dropna(how='all', axis=1, inplace=True) 
df100hz.drop(df100hz.columns[df100hz.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)




## 3. Examine thermometer data 
For each vibrating wire pressure transducer, there is a corresponding thermal record
Find these columns and rename them to match the transducer serial number + "_temp"

In [None]:
# subset to all rows that have a 'Therm(6)' column
df100hz_thermal = df100hz.copy().dropna(subset=['Therm(6)'])
df20hz_thermal = df20hz.copy().dropna(subset=['Therm(2)'])
#display(df100hz_thermal)
count = 0 
lod = []
for col in df100hz_thermal:
    if isinstance(col,str) and (col[0:2]=='12' or col[0:2]=='21'):
        count += 1
        oldcol = f'Therm({count})'
        newcol = f'{col}_temp'
        print(oldcol,'->',newcol)
        df100hz_thermal.rename(columns={oldcol:newcol}, inplace=True)
        this_transducer = LLE.get_transducer_metadata(col, transducersDF)
        depth = this_transducer['set_depth_ft']
        lod.append({'depth':depth, 'Tstd':df100hz_thermal[newcol].std()})
thermal_columns = [item for item in df100hz_thermal.columns if item.endswith('_temp')]
df100hz_thermal.plot(x='datetime', y=thermal_columns, kind='line', ylabel='Temperature (C)').legend(bbox_to_anchor=(1.0,1.0), fontsize='small')
dftemp = pd.DataFrame(lod)
dftemp.plot(x='depth', y='Tstd', style='o', ylabel='stdev(Temperature), C ')

## 4. Plot raw barometric data


In [None]:
dfbaro.plot(x='datetime', y=['AirPressureShallow', 'AirPressureDeep'], kind='line')
df100hz.plot(x='datetime', y=['1226420', '1226429'], kind='line')

## 5. Correct all barometers and vibrating wire sensor data
- barometers are corrected for height only
- vibrating wire sensors in air are corrected for height also
- all vibrating wire sensors are corrected for sensitivity, and temperature
- vibrating wire sensors in water are corrected for barometric pressure too, from vibrating wire sensor in air

In [None]:
dfbaro_elevationRemoved_PSI = LLE.correctBarometricData(dfbaro, ['AirPressureShallow','AirPressureDeep'], transducersDF, temperatureCorrect=False, heightCorrect=True)  
dfbaro_elevationRemoved_PSI.plot(x='datetime', y=['AirPressureShallow', 'AirPressureDeep'], kind='line')
df100hz_elevationRemoved_aircolumnonly = LLE.correctBarometricData(df100hz_thermal, ['1226420', '1226429'], transducersDF, temperatureCorrect=True, heightCorrect=True)
df100hz_elevationRemoved_PSI = LLE.rawdf2psidf(df100hz_elevationRemoved_aircolumnonly, transducersDF, temperatureCorrect=True, airpressureCorrect=True, depthCorrect=False)
df100hz_elevationRemoved_PSI.plot(x='datetime', y=['1226420', '1226429'], kind='line')

## 6. Merge the dataframes after rounding timestamps to nearest minute
All time series from the all2.csv are 4-hourly since we have only 1 file per 4 hours. So we can round to the nearest minute to align times that are generally within 1-s

Drop columns we do not need

In [None]:
aircolumns = ['AirPressureShallow', 'AirPressureDeep', '1226420', '1226429']
LLE.round_datetime(df100hz_elevationRemoved_PSI, freq='min' )
LLE.round_datetime(dfbaro_elevationRemoved_PSI, freq='min')
dfmerged1 = LLE.merge_and_drop(df100hz_elevationRemoved_PSI, dfbaro_elevationRemoved_PSI, on='nearestminute')
display(dfmerged1)
dfmerged1.plot(x='nearestminute', y=aircolumns, kind='line')


## 7. Correlate the air pressure columns - and compare their means too


In [None]:
xcorrdf1, dcshiftdf1 = LLE.xcorr_columns(dfmerged1, aircolumns)

def apply_dcshifts(df, xcorrdf, dcshiftdf):
    best_xcorr_indexes = xcorrdf.iloc[:2, 2:].stack().idxmax()
    dcshifts = dcshiftdf[best_xcorr_indexes[0]].to_dict()
    dfshifted = df.copy()
    for k, v in dcshifts.items():
        print(f'Shifting {k} by {-v} PSI')
        dfshifted[k] -= v
    return dfshifted, dcshifts

dfshifted1, dcshifts1 = apply_dcshifts(dfmerged1, xcorrdf1, dcshiftdf1)
dfshifted1.plot(x='nearestminute', y=aircolumns, kind='line', ylabel='PSI')



## 8. Repeat the analysis, but don't make a temperature correction
Improves correlation between 1226420 and analog barometers by 2%, but degrades 1226429 against same by almost 1%
Improves correlation between 1226420 and 1226429 by 4% 
However, if we stick to using 1226429, this loss in performance is fine.
Best shifts are now:
* +14.42556 PSI to align 1226420 with AirPressureShallow
* +14.55743 PSI to align 1226429 with AirPressureShallow

In [None]:
df100hz_elevationRemoved_aircolumnonly_noT = LLE.correctBarometricData(df100hz_thermal, ['1226420', '1226429'], transducersDF, temperatureCorrect=False, heightCorrect=True)
#dfbaro_elevationRemoved_PSI = LLE.correctBarometricData(dfbaro, ['AirPressureShallow','AirPressureDeep'], transducersDF, temperatureCorrect=False, heightCorrect=True)  

LLE.round_datetime(df100hz_elevationRemoved_aircolumnonly_noT, freq='min' )
#LLE.round_datetime(dfbaro_elevationRemoved_PSI, freq='min')
dfmerged2 = LLE.merge_and_drop(df100hz_elevationRemoved_aircolumnonly_noT, dfbaro_elevationRemoved_PSI, on='nearestminute')
display(dfmerged2)
#dfmerged2.plot(x='nearestminute', y=aircolumns, kind='line', ylabel='PSI')

xcorrdf2, dcshiftdf2 = LLE.xcorr_columns(dfmerged2, aircolumns)
dfshifted2, dcshifts2 = apply_dcshifts(dfmerged2, xcorrdf2, dcshiftdf2)
dfshifted2.plot(x='nearestminute', y=aircolumns, kind='line', ylabel='PSI')

dfxcorrdiff = xcorrdf2-xcorrdf1
dfxcorrdiff_styled = dfxcorrdiff.style.set_caption('Difference in cross-correlation')
dfshiftdiff = dcshiftdf2-dcshiftdf1
dfshiftdiff_styled = dfshiftdiff.style.set_caption('Difference in DC levels')

display(dfxcorrdiff_styled)
display(dfshiftdiff_styled)


## 9. Full workflow for 100 Hz data


In [None]:
# 10.1: Get raw 100 Hz data and remove anything with spikes
df100hz_full = df100hz.copy()
dfbaro_full = dfbaro.copy()

# 10.2: Correct analog and digital air column transducers for calibration, elevation above water, and apply DC shift (no temperature correction)
dfbaro_dcshifted = LLE.correctBarometricData(dfbaro_full, aircolumns[:2], transducersDF, temperatureCorrect=False, heightCorrect=True, dcshifts=dcshifts2)
df100hz_dcshifted = LLE.correctBarometricData(df100hz_full, aircolumns[2:], transducersDF, temperatureCorrect=False, heightCorrect=True, dcshifts=dcshifts2)

# 10.3: Merge and plot
LLE.round_datetime(df100hz_dcshifted, freq='min')
LLE.round_datetime(dfbaro_dcshifted, freq='min')
dfmerged_dcshifted = LLE.merge_and_drop(dfbaro_dcshifted, df100hz_dcshifted, on='nearestminute', drop=False)
display(dfmerged_dcshifted)
dfmerged_dcshifted.plot(x='nearestminute', y=aircolumns, style='.', ylabel='PSI')

# 10.4: Correct digital water column transducers for calibration and barometric pressure
correctedAllSensorsPSI = LLE.rawdf2psidf(dfmerged_dcshifted, transducersDF, temperatureCorrect=False, airpressureCorrect=True, depthCorrect=False)
watercolumns = ['1226419', '1226421', '2151691', '2149882']
display(correctedAllSensorsPSI[watercolumns])

# 10.5: plot PSI
correctedAllSensorsPSI.plot(x='nearestminute', y=aircolumns+watercolumns, style='.', ylabel='PSI')

# 10.6: convert to water levels in meters
def psi2meters(df, watercolumns):
    metersPerPSI = 0.703070
    df2 = df.copy()
    for col in df.columns:
        if col in watercolumns:
            df2[col] = -df2[col] * metersPerPSI
    return df2

correctedAllSensorsMeters = psi2meters(correctedAllSensors, watercolumns)
correctedAllSensorsMeters.plot(x='nearestminute', y=watercolumns, style='.', ylabel='Meters')


# 10.7: convert to water levels in meters relative to the set depth measured by Steve Krupa
def relative_to_set_depth(df, transducersDF, watercolumns):
    df2 = df.copy()
    for col in df.columns:
        if col in watercolumns:    
            this_transducer = LLE.get_transducer_metadata(col, transducersDF)
            print(this_transducer)
            df2[col] = df2[col] - this_transducer['set_depth_ft']*0.3048
    return df2

relativeAllSensorsMeters = relative_to_set_depth(correctedAllSensorsMeters, transducersDF, watercolumns)
relativeAllSensorsMeters.plot(x='nearestminute', y=watercolumns, style='.', ylabel='Meters')


# 10.8: estimate correct set depths from median of each, and shift by this amount

def estimate_sensor_depths(df, watercolumns):
    medians = correctedAllSensorsMeters[watercolumns].median()
    df2 = df.copy()
    for k,v in medians.items():
        print(f'the estimated set_depth for sensor {k} is {v:.2f} m or {v/0.3048:.2f} ft')
        df2[k] =  df[k] - v
    return df2

estimatedAllSensorsMeters = estimate_sensor_depths(correctedAllSensorsMeters, watercolumns)
estimatedAllSensorsMeters.plot(x='nearestminute', y=watercolumns, style='.', ylabel='Meters')   

