In [None]:

import os
import yaml
import datetime
import numpy as np
import pandas as pd
import xarray as xr
import colorcet as cc
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import matplotlib.dates as mdates


from period1.utils import plot_field, load_obj, sum_total_emissions, count_obs_in_mask

In [None]:

def to_datetime_list(date_str_list):
    return [datetime.datetime.strptime(str(date_str), "%Y%m%d") for date_str in date_str_list]

In [None]:
# Read the configuration file *update if not on aws* 
config = yaml.load(open("/home/ubuntu/integrated_methane_inversion/config.yml"), Loader=yaml.FullLoader)

In [None]:
# Open the state vector file
state_vector_filepath = './../StateVector.nc'
state_vector = xr.load_dataset(state_vector_filepath)
state_vector_labels = state_vector['StateVector']

# Identify the last element of the region of interest
last_ROI_element = int(np.nanmax(state_vector_labels.values) - config['nBufferClusters'])

# Define mask for region of interest
mask = (state_vector_labels <= last_ROI_element)

In [None]:
# Paths to prior emissions, inversion results, GEOS/satellite data, posterior simulation
# Get the current directory
cwd = os.getcwd()

# Extract the start dates of each period
periods_df = pd.read_csv('./../periods.csv')
start_dates = periods_df.iloc[:, 0].tolist()
end_dates = periods_df.iloc[:, 1].tolist()
num_periods = len(start_dates)

prior_prefix = f'./../jacobian_runs/{config["RunName"]}_0000/OutputDir/HEMCO_diagnostics.'
prior_paths = [f'{prior_prefix}{date}0000.nc' for date in start_dates]
results_prefixes = [f"./{name}/" for name in os.listdir(cwd) if os.path.isdir(os.path.join(cwd, name))]
results_paths = [prefix + "gridded_posterior.nc" for prefix in results_prefixes]
satdat_dirs = [prefix + "data_converted" for prefix in results_prefixes]
inversion_result_paths = [prefix + "inversion_result.nc" for prefix in results_prefixes]
posterior_dirs = [prefix + "data_converted_posterior" for prefix in results_prefixes]
visualization_dirs = [prefix + "data_visualization" for prefix in results_prefixes]
posterior_viz_dirs = [prefix + "data_visualization_posterior" for prefix in results_prefixes]

In [None]:
# Set latitude/longitude bounds for plots

# Trim 1-2.5 degrees to remove GEOS-Chem buffer zone
if config['Res'] == '0.25x0.3125':
    degx = 4 * 0.3125
    degy = 4 * 0.25
elif config['Res'] == '0.5x0.625':
    degx = 4 * 0.625
    degy = 4 * 0.5

lon_bounds = [np.min(state_vector.lon.values)+degx, np.max(state_vector.lon.values)-degx]
lat_bounds = [np.min(state_vector.lat.values)+degy, np.max(state_vector.lat.values)-degy]

In [None]:
fig = plt.figure(figsize=(8,8))
plt.rcParams.update({'font.size': 16})
ax = fig.subplots(1,1,subplot_kw={'projection': ccrs.PlateCarree()})

plot_field(ax, state_vector_labels, cmap=cc.cm.glasbey, 
           title='State vector elements', cbar_label='Element Id')

In [None]:
# Prior emissions
priors = [xr.load_dataset(prior_pth)['EmisCH4_Total'].isel(time=0) for prior_pth in prior_paths]

# Optimized scale factors
scales = [xr.load_dataset(results_pth)['ScaleFactor'] for results_pth in results_paths]

# Posterior emissions
posteriors = [priors[i] * scales[i] for i in range(num_periods)]

In [None]:

# Calculate total emissions per interval in the region of interest
areas = [xr.load_dataset(prior_pth)['AREA'] for prior_pth in prior_paths]

total_prior_emissions_per_period = [sum_total_emissions(priors[i], areas[i], mask) for i in range(num_periods)]
total_posterior_emissions_per_period = [sum_total_emissions(posteriors[i], areas[i], mask) for i in range(num_periods)]
posterior_df = pd.DataFrame({
    'Date': to_datetime_list(end_dates),
    'Emissions': total_posterior_emissions_per_period
})
# Calculate the moving average for 4 intervals at a time
smoothing_window = 4
smoothing_num_days = config["UpdateFreqDays"] * smoothing_window
posterior_df['MovingAverage'] = posterior_df['Emissions'].rolling(window=smoothing_window,min_periods=1,center=True).mean()


In [None]:
plt.clf()
plt.figure(figsize=(15, 6))
plt.plot(posterior_df['Date'], posterior_df['Emissions'], label="Weekly Emission")
plt.plot(posterior_df['Date'], posterior_df['MovingAverage'], label=f'{smoothing_num_days}-day Moving Average')
plt.gca().xaxis.set_major_locator(mdates.WeekdayLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.xticks(rotation=45)
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.xlabel('Date')
plt.ylabel('Methane Emissions Tg/y')
plt.title('Posterior Emissions Time Series')
plt.legend()