In [None]:
import sys
import glob
import os
import pandas as pd
import numpy as np
import scipy.stats as st
import xarray as xr
import cartopy.crs as ccrs
import cartopy

import io
import cmdstanpy
import arviz as az
from IPython.display import Image

import bokeh
from bokeh.plotting import figure, output_file, show
from bokeh.layouts import gridplot, row, column
import bokeh.io
import bokeh.plotting
from bokeh import palettes
from bokeh.models import Legend
from scipy import odr
import matplotlib.pyplot as plt
from matplotlib import colors


from bokeh.palettes import Spectral6
from bokeh.models import ColorBar, ColumnDataSource
from bokeh.transform import linear_cmap

from IPython.display import Image
from datetime import datetime, timedelta

bokeh.io.output_notebook()
import holoviews as hv
import bebi103
hv.extension('bokeh')

In [None]:
noaa = pd.read_csv('/Users/arianatribby/git/oklahoma_propane/data/windrose/noaa_summer2017to2020summer_forstilt_withflasks.csv')

In [None]:
emissions_1degby1deg = xr.open_dataset('/Users/arianatribby/git/oklahoma_propane/data/processing/wellhead_emiss_for_footprint.nc4')

Load the footprints. There is an issue with stilt modeling and it is skipping some days. So get rid of those days for now. 

In [None]:
# footprints_alldays1 = xr.open_mfdataset('/Users/arianatribby/git/oklahoma_propane/data/stilt_hpc_downloads/noaa_flasknair2017_2020/201706081920_-97.489_36.607_60_foot.nc')

# footprints_alldays2 = xr.open_mfdataset('/Users/arianatribby/git/oklahoma_propane/data/stilt_hpc_downloads/noaa_flasknair2017_2020/201706151921_-97.489_36.607_60_foot.nc')

# xr.concat([footprints_alldays1,footprints_alldays2],dim='time')

I have to use concat to open all of the datasets instead of open_mfdataset because that function (at least for this newer version of xarray that is on this env) uses both xr.merge and xr.concat. The merge function does not allow nonmonotonic sequences (which is your case for the time). But the concat just pastes on the end of the variable so that will work. It also keeps the lon,lat dimensions. 

In [None]:
footprints_alldays = xr.concat([xr.open_dataset(f) for f in glob.glob('/Users/arianatribby/git/oklahoma_propane/data/stilt_hpc_downloads/noaa_flasknair2017_2020/*.nc')], dim='time')


In [None]:
indlist = list(range(0,31)) + list(range(38,72)) + list(range(74,139))

In [None]:
noaa_subset = noaa.iloc[indlist]
subset_footprints = footprints_alldays.isel(time=indlist)

In [None]:
def timeforsel(timestr, hours2subtract, timestr_format):
    dt = datetime.strptime(timestr, timestr_format)
    newtime = dt - timedelta(hours=hours2subtract)
    return np.datetime64(str(newtime))

In [None]:
twelvehour = [timeforsel(x, 12, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
eighteenhour = [timeforsel(x, 18, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
twentyfourhour = [timeforsel(x, 24, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
thirtynhour = [timeforsel(x, 30, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
thirtysixhour = [timeforsel(x, 36, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
fortytwohour = [timeforsel(x, 42, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
fortyeighthour = [timeforsel(x, 48, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
fiftyfourhour = [timeforsel(x, 54, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
sixtyhour = [timeforsel(x, 60, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
sixtysixhour = [timeforsel(x, 66, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]
seventytwohour = [timeforsel(x, 72, '%Y-%m-%d %H:%M:%S') for x in noaa_subset.dt_time]

In [None]:
f, ax = plt.subplots(subplot_kw=dict(projection=ccrs.PlateCarree()))
f.set_figwidth(7)
f.set_figheight(7)
plotdf = footprints_alldays.sel(time=twelvehour).sum(dim='time')
mask = plotdf.foot.values == 0.
# plotdf['signal4log'] = xr.where(mask, np.nan, plotdf.foot.values)
signal4log = xr.where(mask, np.nan, plotdf.foot.values)


# p = ax.pcolormesh(plotdf.lon, plotdf.lat, plotdf.signal4log, 
#                   norm=colors.LogNorm(vmin=plotdf.signal4log.min(), vmax=plotdf.signal4log.max()))


p = ax.pcolormesh(plotdf.lon, plotdf.lat, signal4log, 
                  norm=colors.LogNorm(vmin=np.nanmin(signal4log), vmax=np.nanmax(signal4log)))

plt.title('NOAA flask all summer days time - 12 hours')
plt.colorbar(p)
ax.add_feature(cartopy.feature.STATES)
plt.show()

In [None]:
f, ax = plt.subplots(subplot_kw=dict(projection=ccrs.PlateCarree()))
f.set_figwidth(7)
f.set_figheight(7)
plotdf = footprints_alldays.sum(dim='time')
mask = plotdf.foot.values == 0.
# plotdf['signal4log'] = xr.where(mask, np.nan, plotdf.foot.values)
signal4log = xr.where(mask, np.nan, plotdf.foot.values)


# p = ax.pcolormesh(plotdf.lon, plotdf.lat, plotdf.signal4log, 
#                   norm=colors.LogNorm(vmin=plotdf.signal4log.min(), vmax=plotdf.signal4log.max()))


p = ax.pcolormesh(plotdf.lon, plotdf.lat, signal4log, 
                  norm=colors.LogNorm(vmin=np.nanmin(signal4log), vmax=np.nanmax(signal4log)))

plt.title('NOAA flask all summer days time : 1-72 hours')
plt.colorbar(p)
ax.add_feature(cartopy.feature.STATES)
plt.show()

# Prior Visualization

Zhang et al (where we derived the emissions from) is based on tropomi observations from may 2018 to march 2019. So i am assuming the emissions are pretty accurate (so the scaling factor will be centered at 1). 

In [None]:
fh = 300
fw = 300
p = bokeh.plotting.figure(frame_height=fh, frame_width=fw, title='')
p.line(np.linspace(0, 2), st.norm.pdf(np.linspace(0,2), 1, .3), line_width=2, color='black')
p.xaxis.axis_label = "x (scaling factor, unitless)"
p.yaxis.axis_label = "PDF"
p.xaxis.axis_label_text_font_size = "16pt"
p.yaxis.axis_label_text_font_size = "16pt"
p.xaxis.major_label_text_font_size = "15pt"
p.yaxis.major_label_text_font_size = "15pt"
p.xaxis.major_tick_line_width = 3
p.yaxis.major_tick_line_width = 3
p.axis.axis_label_text_font_style = 'bold'
bokeh.io.show(p)

The uncertainty in mole fraction is estimated to be 50%. This parameter is the tccon/flask observation uncertainty + stilt ppb uncertainty (that is dependent on the meteorology uncertainty due to winds). Right now we are estimating a 50% error. 

In [None]:
max(noaa.ch4_anomaly)

In [None]:
# 10**9 is the conversion factor for ppb but since this is the anomaly, decrease by a factor of 10
fluxperkg_toC3ppb = (44.097*(10**8))/1000
fluxperkg_toC2ppb = (30.07*(10**8))/1000
fluxperkg_toC1ppb = (16.04*(10**8))/1000

In [None]:
footprints_alldays.sum(dim='time')

In [None]:
emissions_1degby1deg['ch4_kgs']

In [None]:
footprints_alldays.sum(dim='time')*emissions_1degby1deg['ch4_kgs']

In [None]:
f, ax = plt.subplots(subplot_kw=dict(projection=ccrs.PlateCarree()))
f.set_figwidth(7)
f.set_figheight(7)
plotdf = footprints_alldays.sum(dim='time')*emissions_1degby1deg['ch4_kgs']*fluxperkg_toC1ppb
mask = plotdf.foot.values == 0.
# plotdf['signal4log'] = xr.where(mask, np.nan, plotdf.foot.values)
signal4log = xr.where(mask, np.nan, plotdf.foot.values)


# p = ax.pcolormesh(plotdf.lon, plotdf.lat, plotdf.signal4log, 
#                   norm=colors.LogNorm(vmin=plotdf.signal4log.min(), vmax=plotdf.signal4log.max()))


p = ax.pcolormesh(plotdf.lon, plotdf.lat, signal4log, 
                  norm=colors.LogNorm(vmin=np.nanmin(signal4log), vmax=np.nanmax(signal4log)))

plt.title('NOAA flask all summer days time : 1-72 hours')
plt.colorbar(p)
ax.add_feature(cartopy.feature.STATES)
plt.show()

In [None]:
the_fifty_percent_error_est = emissions_1degby1deg['ch4_kgs']

In [None]:
fh = 300
fw = 300
p = bokeh.plotting.figure(frame_height=fh, frame_width=fw, title='')
p.line(plane_df.tropht.values, plane_df['sigma_likelihood'].values, size=4, color='black')
p.xaxis.axis_label = "σ (Variability in ppb)"
p.yaxis.axis_label = "PDF"
p.xaxis.axis_label_text_font_size = "16pt"
p.yaxis.axis_label_text_font_size = "16pt"
p.xaxis.major_label_text_font_size = "15pt"
p.yaxis.major_label_text_font_size = "15pt"
p.xaxis.major_tick_line_width = 3
p.yaxis.major_tick_line_width = 3
p.axis.axis_label_text_font_style = 'bold'
bokeh.io.show(p)

# Model with summing all .1x.1 as a single grid cell
## Over entire 3 days 
Multiplying the emissions (units of kg/s) by the time that we are doing inversion over

In [None]:
hour_totalsecs = 60*60
ch4_E_kgs = emissions_1degby1deg['ch4_kgs'].sum().values*72*hour_totalsecs
ch4_E_kgs_unc = emissions_1degby1deg['ch4_kgs_unc'].sum().values*72*hour_totalsecs

c3h8_E_kgs = emissions_1degby1deg['c3h8_kgs'].sum().values*72*hour_totalsecs
c3h8_E_kgs_unc = emissions_1degby1deg['c3h8_kgs_unc'].sum().values*72*hour_totalsecs

c2h6_E_kgs = emissions_1degby1deg['c2h6_kgs'].sum().values*72*hour_totalsecs
c2h6_E_kgs_unc = emissions_1degby1deg['c2h6_kgs_unc'].sum().values*72*hour_totalsecs