In [None]:
#Import Packages
import pandas as pd
import numpy as np
import os
import itertools
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import re
import matplotlib.pyplot as plt
from PIL import Image
import datetime
import matplotlib
import matplotlib.dates as mdates
from sklearn.linear_model import LinearRegression
import string
import statsmodels.api as sm
from herbie import Herbie
import pickle
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
import pysolar.solar as solar
from geographiclib.geodesic import Geodesic
import xarray as xr
import pytz
import simplekml
from pylr2 import regress2
import cartopy
import sklearn
import sys
sys.path.append('..')
import funcs.ac_funcs as ac

pd.options.mode.chained_assignment = None

%load_ext autoreload
%autoreload 2

In [None]:
# Define functions needed for quantile method
def get_excess(df,col_list,quantile):
    out_df = df.copy()
    out_df = subtract_quantile(out_df,col_list,quantile)
    return out_df

def subtract_quantile(df,col_list,quantile):
    for col in col_list:
        df[quant_col_label(col,quantile)] = df[col] - get_col_quantile(df,col,quantile)
    return df

def quant_col_label(col,quantile):
    return f'{col}_ex{int(quantile*100)}q'

def get_col_quantile(df,col,quantile):
    return df[col].quantile(quantile)

def rmv_prep(str):
    return '_'.join(str.split('_')[1:])

def get_ratio(df,idx,min_beforafter,type):
    dt1 = idx-datetime.timedelta(minutes=min_beforafter)
    dt2 = idx+datetime.timedelta(minutes=min_beforafter)
    minidf = df.loc[(df.index>=dt1)&(df.index<=dt2)]
    if len(minidf.dropna()) == 0:
        return np.nan,np.nan,np.nan
    try:
        if type=='ch4_co2':
            linregress = ac.lin_regress_2(minidf,'xco2(ppm)_ex1q','xch4(ppm)_ex1q')
            return linregress['slope'],linregress['r2'],len(minidf.dropna())
        elif type == 'ch4_co':
            linregress = ac.lin_regress_2(minidf,'xco(ppb)_ex1q','xch4(ppm)_ex1q')
            return linregress['slope'],linregress['r2'],len(minidf.dropna())
        elif type == 'co_co2':
            linregress = ac.lin_regress_2(minidf,'xco2(ppm)_ex1q','xco(ppb)_ex1q')
            return linregress['slope'],linregress['r2'],len(minidf.dropna())
    except:
        return np.nan,np.nan,np.nan

In [None]:
base_project_dir = '/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1'

In [None]:
#Load the side by side data for ua and ha so that we can correct to one another
inst_details = {'ha':os.path.join(base_project_dir,'Data/EM27_oof/SLC_EM27_ha_2022_2023_oof_v2_nasrin_correct'),
                'ua':os.path.join(base_project_dir,'Data/EM27_oof/summer_2023/elaine_retrievals/ua')}
filter_flag_0 = True #set to True if we want to filter bad spectra
resample = '5T' #this will resample to that level -- needed to merge dfs and do the regression (and thereby correction)
timezone = 'US/Mountain'  #timezone within which to load the dataframes
specs = ['xch4(ppm)','xco2(ppm)','xco(ppb)'] #these are the species we want to correct
quantile = 0.01
min_beforeafter = 30
inst_id = 'ha'

dt1 = ac.dtstr_to_dttz('2022-05-01 00:00:00',timezone) #get the start and end of the range
dt2 = ac.dtstr_to_dttz('2024-01-01 00:00:00',timezone)

#Load oof
data_folder = inst_details[inst_id] #the data folder is defined in the instrument details dicitonary
my_oof_manager = ac.oof_manager(data_folder,timezone) #create the oof manager for that instrument
oof_df = my_oof_manager.load_oof_df_inrange(dt1,dt2,filter_flag_0=filter_flag_0,cols_to_load=specs) #load the datetime in the range
if resample is not None: #if there is a resample value
    oof_df = oof_df.resample(resample).mean(numeric_only = True) #resample to that value by mean
daily_dfs = [part for _, part in oof_df.dropna(how='all').groupby(pd.Grouper(freq='1D')) if not part.empty] #parse into a list of daily dataframes
oof_with_ex = pd.DataFrame()
for i in range(0,len(daily_dfs)):
    df = daily_dfs[i][specs]
    oof_with_ex = pd.concat([oof_with_ex,get_excess(df,specs,quantile)])
if resample is not None: #if there is a resample value
    oof_with_ex = oof_with_ex.resample(resample).mean(numeric_only = True) #resample to that value by mean

#Add ratios
ch4_co2_slope = []
ch4_co2_r2 = []
ch4_co2_nobs = []
ch4_co_slope = []
ch4_co_r2 = []
ch4_co_nobs = []
co_co2_slope = []
co_co2_r2 = []
co_co2_nobs = []
for iloc in range(len(oof_with_ex)):
    print(iloc)
    idx = oof_with_ex.iloc[iloc].name
    ch4_co2 = get_ratio(oof_with_ex,idx,min_beforeafter,'ch4_co2')
    ch4_co = get_ratio(oof_with_ex,idx,min_beforeafter,'ch4_co')
    co_co2 = get_ratio(oof_with_ex,idx,min_beforeafter,'co_co2')
    ch4_co2_slope.append(ch4_co2[0])
    ch4_co2_r2.append(ch4_co2[1])
    ch4_co2_nobs.append(ch4_co2[2])
    ch4_co_slope.append(ch4_co[0])
    ch4_co_r2.append(ch4_co[1])
    ch4_co_nobs.append(ch4_co[2])
    co_co2_slope.append(co_co2[0])
    co_co2_r2.append(co_co2[1])
    co_co2_nobs.append(co_co2[2])

oof_with_ex['ch4_co2_slope'] = ch4_co2_slope
oof_with_ex['ch4_co2_r2'] = ch4_co2_r2
oof_with_ex['ch4_co2_nobs'] = ch4_co2_nobs
oof_with_ex['ch4_co_slope'] = ch4_co_slope
oof_with_ex['ch4_co_r2'] = ch4_co_r2
oof_with_ex['ch4_co_nobs'] = ch4_co_nobs
oof_with_ex['co_co2_slope'] = co_co2_slope
oof_with_ex['co_co2_r2'] = ch4_co2_r2
oof_with_ex['co_co2_nobs'] = ch4_co2_nobs
oof_with_ex = oof_with_ex.dropna(how='all')

#Load all of the met data
mlg = ac.met_loader_ggg('/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1/Data/met/wbb/daily_txt_gggformat/')
wbb_met_df = mlg.load_data_inrange(dt1,dt2)
wbb_met_df.index = wbb_met_df.index.tz_convert(timezone)
wbb_met_df = wbb_met_df[['pres','temp','rh','u','v']].resample(resample).mean(numeric_only = True)
wbb_met_df['ws'],wbb_met_df['wd'] = np.vectorize(ac.uv_to_wdws)(wbb_met_df['u'],wbb_met_df['v'])

#Merge met and oof
merged_oof_met = pd.concat([wbb_met_df[['ws','wd','u','v']],oof_with_ex],axis = 1)
merged_oof_met.index.name = 'dt'

In [None]:
fname = f'{inst_id}_202205_202311_{resample}_ratios.csv'
merged_oof_met.round(5).reset_index().dropna().to_csv(os.path.join(base_project_dir,'Data/csv_for_r/',fname),index=False)

In [None]:
fig = make_subplots(rows = 4,cols = 1,shared_xaxes=True)

plot_df = merged_oof_met.loc[merged_oof_met.index>'2023-07-01'].dropna()

fig.add_trace(go.Scatter(
    x=plot_df.index,
    y = plot_df['xch4(ppm)_ex1q'],
    mode = 'markers',
    marker_size = 3
),row = 1,col = 1)
fig.update_yaxes(title_text='xch4(ppm)_ex1q', row=1, col=1)
fig.add_trace(go.Scatter(
    x=plot_df.index,
    y = plot_df['ch4_co2_slope'],
    mode = 'markers',
    marker_size = 3
),row = 2,col = 1)
fig.update_yaxes(title_text='ch4_co2_slope', row=2, col=1)
fig.add_trace(go.Scatter(
    x=plot_df.index,
    y = plot_df['ws'],
    mode = 'markers',
    marker_size = 3
),row = 3,col = 1)
fig.update_yaxes(title_text='ws', row=3, col=1)
fig.add_trace(go.Scatter(
    x=plot_df.index,
    y = plot_df['wd'],
    mode = 'markers',
    marker_size = 3
),row = 4,col = 1)
fig.update_yaxes(title_text='wd', row=4, col=1)

fig.update_layout(
    height = 700
)
fig.show()