In [1]:
#Import Packages
import pandas as pd
import numpy as np
import os
import itertools
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import re
import matplotlib.pyplot as plt
from PIL import Image
import datetime
import matplotlib
import matplotlib.dates as mdates
from sklearn.linear_model import LinearRegression
import string
import statsmodels.api as sm
from herbie import Herbie
import pickle
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
import pysolar.solar as solar
from geographiclib.geodesic import Geodesic
import xarray as xr
import pytz
import simplekml
from pylr2 import regress2
import cartopy
import sklearn
import sys
sys.path.append('..')
import funcs.ac_funcs as ac
import funcs.plotting as plotting

pd.options.mode.chained_assignment = None

%load_ext autoreload
%autoreload 2

# Aaron Vs Elaine 
UA Summer 2023 Retrievals.\
Aaron retrievals using EGI on UoU CHPC, no airmass correction\
Elain retrievals at Harvard, with airmass correction

In [None]:
#Define parameters
base_project_dir = '/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1/'
inst_ids = ['ha','ua']
flag = 0
resample = 'T'
timezone = 'US/Mountain' 
specs = ['xch4(ppm)','xco2(ppm)','xco(ppb)']

In [None]:
#Load the EM27 Data
aaron_folder = "/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1/Data/EM27_oof/summer_2023/aaron_retrievals/ha/"
elaine_folder = "/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1/Data/EM27_oof/summer_2023/elaine_retrievals/ha/"

dt1_str = '2023-07-08 11:00:00'
dt2_str = '2023-07-11 23:59:59' 
tz = 'UTC'

aaron_oof_manage = ac.oof_manager(aaron_folder,tz)
aaron_em27_df = aaron_oof_manage.load_oof_df_inrange(dt1_str,dt2_str,True)

elaine_oof_manage = ac.oof_manager(elaine_folder,tz)
elaine_em27_df = elaine_oof_manage.load_oof_df_inrange(dt1_str,dt2_str,True)


In [None]:
species = ['xch4(ppm)','xco2(ppm)','xco(ppb)']
fig = make_subplots(rows=3,cols=1)
row = 1
for spec in species:
    if row == 1:
        showlegend = True
    else:
        showlegend = False
    fig.add_trace(go.Scatter(
        x = aaron_em27_df.index,
        y = aaron_em27_df[spec],
        mode = 'markers',
        marker = {'color' : 'grey'},
        name = 'aaron',
        showlegend = showlegend
    ), row = row , col = 1)
    fig.add_trace(go.Scatter(
        x = elaine_em27_df.index,
        y = elaine_em27_df[spec],
        mode = 'markers',
        marker = {'color' : 'red'},
        name = 'elaine',
        showlegend = showlegend
    ), row = row , col = 1)
    fig.update_yaxes(title_text=f'{spec}',row=row,col=1)
    row += 1

fig.update_layout(
    height=600,
    margin = go.layout.Margin(t=1,b=1)
)

# Airmass Correction Comparison
Both retrieved by Elaine at Harvard\
One with Nasrin's airmass correction\
One without

In [None]:
#Loading the full datasets can take a while
dt1_str = '2022-05-23 00:00:00'
dt2_str =  '2022-12-01 00:00:00'
tz = 'UTC'

em27_data_folder_v1 = "/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1/Data/EM27_oof/SLC_EM27_ha_2022_2023_oof_v1/"
em27_data_folder_v2 = "/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1/Data/EM27_oof/SLC_EM27_ha_2022_2023_oof_v2_nasrin_correct/"

oof_manage_v1 = ac.oof_manager(em27_data_folder_v1,tz)
em27_df_v1 = oof_manage_v1.load_oof_df_inrange(dt1_str,dt2_str,False)

oof_manage_v2 = ac.oof_manager(em27_data_folder_v2,tz)
em27_df_v2 = oof_manage_v2.load_oof_df_inrange(dt1_str,dt2_str,False)

## Flag Differences

We examine both V1 and V2 datasets for differences between flags. V2 has many more rows of flag=0 (good) data. All of the rows that are flag=0 in V2 and are NOT flag=0 in V1 are flag=99 in V1. 

In [None]:
flag0df_v1 = em27_df_v1.loc[em27_df_v1['flag']==0]
flag0df_v2 = em27_df_v2.loc[em27_df_v2['flag']==0]
diff_flag_idxs = flag0df_v2.index.difference(flag0df_v1.index)

diff_flags_df_v1 = em27_df_v1.loc[diff_flag_idxs]
diff_flags_df_v2 = em27_df_v2.loc[diff_flag_idxs]

print(f"V1 has {len(flag0df_v1)} rows of flag=0 data")
print(f"V2 has {len(flag0df_v2)} rows of flag=0 data")
print(f"V2 has {len(diff_flag_idxs)} more flag=0 data")

In [None]:
diff_flags_df_v1.groupby('flag').count()

In [None]:
subtract_diff = flag0df_v1.drop(['spectrum'],axis = 1).subtract(flag0df_v2.drop(['spectrum'],axis=1))

In [None]:
labsize = 18

plotdf_v1 = em27_df_v1.loc[(em27_df_v1.index>'2022-07-20')&
                       (em27_df_v1.index<'2022-07-23')]
plotdf_v2 = em27_df_v2.loc[(em27_df_v2.index>'2022-07-20')&
                       (em27_df_v2.index<'2022-07-23')]

fig,ax = plt.subplots(figsize = (20,10))
ax.scatter(plotdf_v1.index[10:],plotdf_v1['xh2o(ppm)'][10:],s=3,c='k',label='V1')
ax.scatter(plotdf_v2.index[10:],plotdf_v2['xh2o(ppm)'][10:],s=3,c='r',label='V2')
ax.set_ylabel('XH2O (ppm)',size = labsize)
ax.set_xlabel('Datetime (UTC)',size = labsize)
ax.tick_params(labelsize = labsize)
ax.legend(fontsize=labsize)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M %Z', tz = plotdf_v2.index.tz))
ax.set_xlabel(plotdf_v2.index[0].strftime('%b %d, %Y'),size = labsize)
plt.gcf().autofmt_xdate()
plt.show()

In [None]:
labsize = 18

fig,ax = plt.subplots(figsize = (20,10))
ax.scatter(subtract_diff.index,subtract_diff['xh2o(ppm)'],s=3,c='k')
ax.set_ylabel('V1 - V2 XH2O(ppm) Difference',size = labsize)
ax.set_xlabel('Datetime (UTC)',size = labsize)
ax.tick_params(labelsize = labsize)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d', tz = plotdf_v2.index.tz))
ax.set_xlabel('Datetime UTC',size = labsize)
plt.gcf().autofmt_xdate()
plt.show()

In [None]:
summary_df = subtract_diff.describe().drop(['count','25%','50%','75%'])
summary_df
#summary_df.to_csv('/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1/Data/v1_v2_subtraction_stats.csv')

# Retrievals after NaN fix
Comparing retrievals before and after Jacobs fix of the NaNs in wco2_6073 and CH4_5938. 

In [47]:
tz = 'US/Mountain'

nanfix_static_folder = '/uufs/chpc.utah.edu/common/home/lin-group15/agm/em27/ha/test/gfit_static_cflags/original_met/daily/20230710'
nanfix_static_oofmanage = ac.oof_manager(nanfix_static_folder,tz)
nanfix_static_df = nanfix_static_oofmanage.df_from_oof('ha20230710.vav.ada.aia.oof',fullformat=True,filter_flag_0=True)

nanfix_new_folder = '/uufs/chpc.utah.edu/common/home/lin-group15/agm/em27/ha/test/gfit_new_cflags/original_met/daily/20230710'
nanfix_new_oofmanage = ac.oof_manager(nanfix_new_folder,tz)
nanfix_new_df = nanfix_new_oofmanage.df_from_oof('ha20230710.vav.ada.aia.oof',fullformat=True,filter_flag_0=True)

nans_folder = '/uufs/chpc.utah.edu/common/home/lin-group15/agm/em27/ha/results/daily/20230710'
nans_oofmanage = ac.oof_manager(nans_folder,tz)
nans_df = nans_oofmanage.df_from_oof('ha20230710.vav.ada.aia.oof',fullformat=True,filter_flag_0=True)

In [52]:
species = ['xch4(ppm)','xco2(ppm)']
fig = make_subplots(rows=len(species),cols=1,shared_xaxes=True)
row = 1
for spec in species:
    if row == 1:
        showlegend = True
    else:
        showlegend = False
    fig.add_trace(go.Scatter(
        x = nanfix_static_df.index,
        y = nanfix_static_df[spec],
        mode = 'markers',
        marker = {'color' : 'grey','size':8},
        name = 'no_nans',
        showlegend = showlegend
    ), row = row , col = 1)
    # fig.add_trace(go.Scatter(
    #     x = nanfix_new_df.index,
    #     y = nanfix_new_df[spec],
    #     mode = 'markers',
    #     marker = {'color' : 'blue','size':4},
    #     name = 'nanfix_static',
    #     showlegend = showlegend
    # ), row = row , col = 1)
    fig.add_trace(go.Scatter(
        x = nans_df.index,
        y = nans_df[spec],
        mode = 'markers',
        marker = {'color' : 'red','size':3},
        name = 'with_nans',
        showlegend = showlegend
    ), row = row , col = 1)
    fig.update_yaxes(title_text=f'{spec}',row=row,col=1)
    row += 1

fig.update_layout(
    height=500,
    #margin = go.layout.Margin(t=1,b=1),
    title={
        'text': "HA",
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})


In [51]:
subtract_diff = nanfix_static_df.drop(['spectrum'],axis = 1).subtract(nans_df.drop(['spectrum'],axis=1))

summary_df = subtract_diff.describe().drop(['count','25%','50%','75%'])
sdf = summary_df.loc[:, (summary_df != 0.0).any(axis=0)]
for col in sdf.columns:
    sdf = sdf.rename(columns={col:f'{col}_diff'})
sdf

Unnamed: 0,xch4(ppm)_diff,xch4(ppm)_error_diff,xwco2(ppm)_diff,xwco2(ppm)_error_diff
mean,0.004275,-0.000169,-1.095246,-0.438409
std,0.001068,4.6e-05,1.091877,0.116736
min,0.0001,-0.0002,-5.83,-0.62
max,0.0082,-0.0001,4.19,-0.18


In [53]:
tz = 'US/Mountain'

nanfix_static_folder = '/uufs/chpc.utah.edu/common/home/lin-group15/agm/em27/ua/test/test_results/daily/20230725'
nanfix_static_oofmanage = ac.oof_manager(nanfix_static_folder,tz)
nanfix_static_df = nanfix_static_oofmanage.df_from_oof('ua20230725.vav.ada.aia.oof',fullformat=True,filter_flag_0=True)

nans_folder = '/uufs/chpc.utah.edu/common/home/lin-group15/agm/em27/ua/results/daily/20230725'
nans_oofmanage = ac.oof_manager(nans_folder,tz)
nans_df = nans_oofmanage.df_from_oof('ua20230725.vav.ada.aia.oof',fullformat=True,filter_flag_0=True)

In [57]:
species = ['xch4(ppm)','xco2(ppm)']
fig = make_subplots(rows=len(species),cols=1,shared_xaxes=True)
row = 1
for spec in species:
    if row == 1:
        showlegend = True
    else:
        showlegend = False
    fig.add_trace(go.Scatter(
        x = nanfix_static_df.index,
        y = nanfix_static_df[spec],
        mode = 'markers',
        marker = {'color' : 'grey','size':8},
        name = 'no_nans',
        showlegend = showlegend
    ), row = row , col = 1)
    # fig.add_trace(go.Scatter(
    #     x = nanfix_new_df.index,
    #     y = nanfix_new_df[spec],
    #     mode = 'markers',
    #     marker = {'color' : 'blue','size':4},
    #     name = 'nanfix_static',
    #     showlegend = showlegend
    # ), row = row , col = 1)
    fig.add_trace(go.Scatter(
        x = nans_df.index,
        y = nans_df[spec],
        mode = 'markers',
        marker = {'color' : 'red','size':3},
        name = 'with_nans',
        showlegend = showlegend
    ), row = row , col = 1)
    fig.update_yaxes(title_text=f'{spec}',row=row,col=1)
    row += 1


fig.update_layout(
    height=500,
    #margin = go.layout.Margin(t=1,b=1),
    title={
        'text': "UA",
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})


In [45]:
subtract_diff = nanfix_static_df.drop(['spectrum'],axis = 1).subtract(nans_df.drop(['spectrum'],axis=1))

summary_df = subtract_diff.describe().drop(['count','25%','50%','75%'])
summary_df.loc[:, (summary_df != 0.0).any(axis=0)]


Unnamed: 0,xch4(ppm),xch4(ppm)_error,xwco2(ppm),xwco2(ppm)_error
mean,0.005088,-0.000109,-0.484688,-0.201489
std,0.000772,4.3e-05,0.5501,0.021216
min,0.0024,-0.0002,-4.72,-0.52
max,0.0105,0.0,6.0,-0.16
