# Gridded EPA Methane Inventory
## Extension - GHGI 2022

***
#### Authors: 
Erin E. McDuffie
#### Date Last Updated: 
see Step 0
#### Notebook Purpose: 
This Notebook extends and reports annual gridded (0.1°x0.1°) methane emission fluxes (molec./cm2/s) from enteric fermentation sources for the years 2012-2020, using updated inventory values from the 2022 National GHGI.  
#### Summary & Notes:
EPA annual national methane emissions are read in for the 2022 GHGI (either from the GHGI workbooks or public data). National emissions are then scaled down to CONUS emissions using the relative fraction of CONUS/total emissions from the v2 data (for each year, held constant after 2018). Remaining CONUS data are then allocated to proxy groups using the relevant proxy mapping files and allocated to the grid using the relative mass of emissions in each grid cell from each group from version 2 (for each year, held constant after 2018). Annual emission fluxes (molec./cm2/s) for 2012-2020 are then written to final netCDFs in the ‘/code/Final_Gridded_Data/Extension/v2_ext_final’ folder.
***

-------
## Step 0. Set-Up Notebook Modules, Functions, and Local Parameters and Constants
-------

In [None]:
#Confirm working directory
import os
import time
modtime = os.path.getmtime('./3A_Livestock_Enteric_extension.ipynb')
modificationTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(modtime))
print("This file was last modified on: ", modificationTime)
print('')
print("The directory we are working in is {}" .format(os.getcwd()))

In [None]:
## Include plots within notebook
%matplotlib inline

In [None]:
# Import base modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import datetime
from copy import copy

# Import additional modules
# Load plotting package Basemap 
from mpl_toolkits.basemap import Basemap

# Load netCDF (for manipulating netCDF file types)
from netCDF4 import Dataset

# Set up ticker
import matplotlib.ticker as ticker

#add path for the global function module (file)
import sys
module_path = os.path.abspath(os.path.join('../Global_Functions/'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load Tabula (for reading tables from PDFs)
import tabula as tb   
    
# Load user-defined global functions (modules)
import data_load_functions as data_load_fn
import data_functions as data_fn
import data_IO_functions as data_IO_fn
import data_plot_functions as data_plot_fn

In [None]:
#INPUT Files
# Assign global file names
global_filenames = data_load_fn.load_global_file_names()
State_ANSI_inputfile = global_filenames[0]
County_ANSI_inputfile = global_filenames[1]
pop_map_inputfile = global_filenames[2]
Grid_area01_inputfile = global_filenames[3]
Grid_area001_inputfile = global_filenames[4]
Grid_state001_ansi_inputfile = global_filenames[5]
Grid_county001_ansi_inputfile = global_filenames[6]
globalinputlocation = global_filenames[0][0:20]
print(globalinputlocation)

#EPA Data
EPA_AGR_inputfile = "../Global_InputData/GHGI/Ch5_Agriculture/Table 5-2_2022.csv"

#Proxy Data file
Livestock_Mapping_inputfile = "./InputData/Livestock_Enteric_ProxyMapping.xlsx"

#OUTPUT FILES
gridded_outputfile = '../Final_Gridded_Data/Extension/v2_ext_final/EXT_EPA_v2_3A_Enteric_Fermentation.nc'
netCDF_description = 'EXTENSION to the Gridded EPA Inventory - Enteric Fermentation Emissions - IPCC Source Category 3A'
title_str = "EPA methane emissions from enteric fermentation"
title_diff_str = "Emissions from enteric fermentation difference: 2020-2012"

#output gridded proxy data
grid_emi_inputfile = '../Final_Gridded_Data/Extension/v2_input_data/Livestock_Enteric_Grid_Emi.nc'


In [None]:
# Define local variables
start_year = 2012  #First year in emission timeseries
end_year = 2018    #Last year in emission timeseries
ext_year = 2020    #last year in extended dataset
end_year_idx = 2018-2012 #index of the year 2018
year_range = [*range(start_year, ext_year+1,1)] #List of emission years
year_range_str=[str(i) for i in year_range]
num_years = len(year_range)

# Define constants
Avogadro   = 6.02214129 * 10**(23)  #molecules/mol
Molarch4   = 16.04                  #g/mol
Res01      = 0.1                    # degrees
Res_01     = 0.01
tg_scale   = 0.001                  #Tg scale number [New file allows for the exclusion of the territories] 

# Continental US Lat/Lon Limits (for netCDF files)
Lon_left = -130       #deg
Lon_right = -60       #deg
Lat_low  = 20         #deg
Lat_up  = 55          #deg
loc_dimensions = [Lat_low, Lat_up, Lon_left, Lon_right]

ilat_start = int((90+Lat_low)/Res01) #1100:1450 (continental US range)
ilat_end = int((90+Lat_up)/Res01)
ilon_start = abs(int((-180-Lon_left)/Res01)) #500:1200 (continental US range)
ilon_end = abs(int((-180-Lon_right)/Res01))

# Number of days in each month
month_day_leap  = [  31,  29,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]
month_day_nonleap = [  31,  28,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]

# Month arrays
month_range_str = ['January','February','March','April','May','June','July','August','September','October','November','December']
num_months = len(month_range_str)

area_map, lat001, lon001 = data_load_fn.load_area_map_001(Grid_area001_inputfile)
area_map01, Lat01, Lon01 = data_load_fn.load_area_map_01(Grid_area01_inputfile)[0:3]
#Select relevant Continental 0.1 x0.1 domain
Lat_01 = Lat01[ilat_start:ilat_end]
Lon_01 = Lon01[ilon_start:ilon_end]
area_matrix_01 = data_fn.regrid001_to_01(area_map, Lat_01, Lon_01)
area_matrix_01 *= 10000  #convert from m2 to cm2


In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

In [None]:
# Track run time
ct = datetime.datetime.now() 
it = ct.timestamp() 
print("current time:", ct) 

## Step 1. Read in Gridding Groups

In [None]:
#load GHGI Mapping Groups
names = pd.read_excel(Livestock_Mapping_inputfile, sheet_name = "GHGI Map - Livestock", usecols = "A:B",skiprows = 1, header = 0)
colnames = names.columns.values
ghgi_livestock_map = pd.read_excel(Livestock_Mapping_inputfile, sheet_name = "GHGI Map - Livestock", usecols = "A:B", skiprows = 1, names = colnames)
#drop rows with no data, remove the parentheses and ""
ghgi_livestock_map = ghgi_livestock_map[ghgi_livestock_map['GHGI_Emi_Group'] != 'na']
ghgi_livestock_map = ghgi_livestock_map[ghgi_livestock_map['GHGI_Emi_Group'].notna()]
ghgi_livestock_map['GHGI_Source']= ghgi_livestock_map['GHGI_Source'].str.replace(r"\(","")
ghgi_livestock_map['GHGI_Source']= ghgi_livestock_map['GHGI_Source'].str.replace(r"\)","")
ghgi_livestock_map.reset_index(inplace=True, drop=True)
display(ghgi_livestock_map)

#load emission group - proxy map
names = pd.read_excel(Livestock_Mapping_inputfile, sheet_name = "Proxy Map - Livestock", usecols = "A:G",skiprows = 1, header = 0)
colnames = names.columns.values
proxy_livestock_map = pd.read_excel(Livestock_Mapping_inputfile, sheet_name = "Proxy Map - Livestock", usecols = "A:G", skiprows = 1, names = colnames)
display((proxy_livestock_map))

        
emi_group_names = np.unique(ghgi_livestock_map['GHGI_Emi_Group'])

print('QA/QC: Is the number of emission groups the same for the proxy and emissions tabs?')
if (len(emi_group_names) == len(np.unique(proxy_livestock_map['GHGI_Emi_Group']))):
    print('PASS')
else:
    print('FAIL')

-----------
## Step 2. Read in v2 Grid Group Emissions
----------

In [None]:
#These data will be assigned to 'proxy_+ghgi_emi_name' (because original proxy mapping is not 1:1 with GHGI group)
#All proxy data will be in 0.1x0.1xyear dimensions
#asign 2018 values to years 2019 ad 2020

nc_in = Dataset(grid_emi_inputfile, 'r', format='NETCDF4')
Emissions_nongrid = np.zeros([num_years])

for igroup in np.arange(0,len(proxy_livestock_map)):
    vars()['Proxy_'+proxy_livestock_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    temp = nc_in['Ext_'+proxy_livestock_map['GHGI_Emi_Group'][igroup]][:,:,:]
    for iyear in np.arange(0,num_years):
        if year_range[iyear] <= end_year:
            vars()['Proxy_'+proxy_livestock_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] = temp[:,:,iyear]
        else:
            #print('here')
            vars()['Proxy_'+proxy_livestock_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] = temp[:,:,end_year_idx]

#assign 2018 values to years 2019 and 2020
for iyear in np.arange(0,num_years):
    if year_range[iyear] <= end_year:
        Emissions_nongrid[iyear] = nc_in['Emissions_nongrid'][iyear]
    else:
        #print('here')
        Emissions_nongrid[iyear] = nc_in['Emissions_nongrid'][end_year_idx]

CONUS_frac = np.zeros([num_years])

for iyear in np.arange(0, num_years):
    sum_emi = 0
    for igroup in np.arange(0,len(proxy_livestock_map)):
        sum_emi += np.sum( vars()['Proxy_'+proxy_livestock_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])
    CONUS_frac[iyear] = Emissions_nongrid[iyear]/sum_emi
        
print(CONUS_frac)

-----------
## Step 3. Read in and Format 2022 US EPA GHGI Emissions
----------

In [None]:
#Read in total EPA emissions from public report table 5.2 (in kt)
EPA_emi_agr_CH4 = pd.read_csv(EPA_AGR_inputfile, thousands=',', header=2,nrows = 10)
EPA_emi_agr_CH4 = EPA_emi_agr_CH4.drop(['Unnamed: 0'], axis=1)
EPA_emi_agr_CH4.rename(columns={EPA_emi_agr_CH4.columns[0]:'Source'}, inplace=True)
EPA_emi_agr_CH4 = EPA_emi_agr_CH4.drop(columns = [str(n) for n in range(1990, start_year,1)])
EPA_emi_ent_CH4 = EPA_emi_agr_CH4.loc[EPA_emi_agr_CH4['Source']=="Enteric Fermentation"]
EPA_emi_man_CH4 = EPA_emi_agr_CH4.loc[EPA_emi_agr_CH4['Source']=="Manure Management"]
EPA_emi_ent_CH4.reset_index(inplace=True, drop=True)
EPA_emi_man_CH4.reset_index(inplace=True, drop=True)
print('EPA GHGI National Enteric CH4 Emissions (kt):')
display(EPA_emi_ent_CH4)
print('EPA GHGI National Manure CH4 Emissions (kt):')
display(EPA_emi_man_CH4)


#### 3.2. Split Emissions into Gridding Groups

In [None]:
#split GHG emissions into gridding groups, based on Coal Proxy Mapping file

DEBUG =1
start_year_idx = EPA_emi_ent_CH4.columns.get_loc(str(start_year))
end_year_idx = EPA_emi_ent_CH4.columns.get_loc(str(end_year))+1
ghgi_livestock_groups = ghgi_livestock_map['GHGI_Emi_Group'].unique()
sum_emi = np.zeros([num_years])

for igroup in np.arange(0,len(EPA_emi_ent_CH4)): #loop through all groups, finding the GHGI sources in that group and summing emissions for that region, year        vars()[ghgi_prod_groups[igroup]] = np.zeros([num_regions-1,num_years])
    ##DEBUG## print(ghgi_stat_groups[igroup])
    vars()[ghgi_livestock_groups[igroup]] = np.zeros([num_years])
    source_temp = ghgi_livestock_map.loc[ghgi_livestock_map['GHGI_Emi_Group'] == ghgi_livestock_groups[igroup], 'GHGI_Source']
    pattern_temp  = '|'.join(source_temp) 
    #print(pattern_temp) 
    emi_temp =EPA_emi_ent_CH4[EPA_emi_ent_CH4['Source'].str.contains(pattern_temp)]
    #display(emi_temp)
    vars()[ghgi_livestock_groups[igroup]][:] = emi_temp.iloc[:,start_year_idx:].sum()
        
        
#Check against total summary emissions 
print('QA/QC #1: Check Processing Emission Sum against GHGI Summary Emissions')
for iyear in np.arange(0,num_years): 
    for igroup in np.arange(0,len(EPA_emi_ent_CH4)):
        #if iyear ==0:
        #    vars()[ghgi_livestock_groups[igroup]][iyear] -= 0.5  ##NOTE: correct rounding error so sum of emissions = reported total emissions
        sum_emi[iyear] += vars()[ghgi_livestock_groups[igroup]][iyear]
        
    summary_emi = EPA_emi_ent_CH4.iloc[0,iyear+1]  
    #Check 1 - make sure that the sums from all the regions equal the totals reported
    diff1 = abs(sum_emi[iyear] - summary_emi)/((sum_emi[iyear] + summary_emi)/2)
    if DEBUG==1:
        print(summary_emi)
        print(sum_emi[iyear])
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL (check Production & summary tabs): ', diff1,'%') 

--------------
## Step 4. Grid Data
-------------

##### Step 4.1 Allocate emissions to the CONUS region (0.1x0.1)

In [None]:
# Allocate national emissions (Tg) onto a 0.1x0.1 grid using gridcell level 'Proxy_Groups'

DEBUG =1
#Define emission arrays
Emissions_array_01 = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_nongrid = np.zeros([num_years])

# For each year, distribute natinal emissions onto a grid proxies specified in the Proxy_Mapping file

print('**QA/QC Check: Sum of national gridded emissions vs. GHGI national emissions')

for igroup in np.arange(0,len(proxy_livestock_map)):
    proxy_temp = vars()['Proxy_'+proxy_livestock_map.loc[igroup,'GHGI_Emi_Group']][:,:,:]
    
    for iyear in np.arange(0,num_years):
        proxy_frac = data_fn.safe_div(proxy_temp[:,:,iyear], np.sum(proxy_temp[:,:,iyear]))
        #print(np.sum(proxy_frac))
        ghgi_temp = vars()[proxy_livestock_map.loc[igroup,'GHGI_Emi_Group']][iyear] * (1-CONUS_frac[iyear])
        Emissions_array_01[:,:,iyear] += ghgi_temp * proxy_frac[:,:]
        Emissions_nongrid[iyear] += vars()[proxy_livestock_map.loc[igroup,'GHGI_Emi_Group']][iyear] - ghgi_temp
       
        
for iyear in np.arange(0, num_years):    
    calc_emi = np.sum(Emissions_array_01[:,:,iyear])+ np.sum(Emissions_nongrid[iyear]) 
    summary_emi = EPA_emi_ent_CH4.iloc[0,iyear+1]  
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG==1:
        print(calc_emi)
        print(summary_emi)
    if abs(emi_diff) < 0.0001:
        print('Year '+ year_range_str[iyear]+': Difference < 0.01%: PASS')
    else: 
        print('Year '+ year_range_str[iyear]+': Difference > 0.01%: FAIL, diff: '+str(emi_diff))
        
ct = datetime.datetime.now() 
print("current time:", ct)

#### 4.2 Calculate Gridded Emission Fluxes (molec./cm2/s) (0.1x0.1)

In [None]:
#Convert emissions to emission flux
# conversion: kt emissions to molec/cm2/s flux

Flux_array_01_annual = np.zeros([len(Lat_01),len(Lon_01),num_years])
print('**QA/QC Check: Sum of national gridded emissions vs. GHGI national emissions')
  
for iyear in np.arange(0,num_years):
    calc_emi = 0
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
    else:
        year_days = np.sum(month_day_nonleap)

    conversion_factor_01 = 10**9 * Avogadro / float(Molarch4 *year_days * 24 * 60 *60) / area_matrix_01
    Flux_array_01_annual[:,:,iyear] = Emissions_array_01[:,:,iyear]*conversion_factor_01
    #convert back to mass to check
    conversion_factor_annual = 10**9 * Avogadro / float(Molarch4 *year_days * 24 * 60 *60) / area_matrix_01
    calc_emi = np.sum(Flux_array_01_annual[:,:,iyear]/conversion_factor_annual)+np.sum(Emissions_nongrid[iyear])
    summary_emi = EPA_emi_ent_CH4.iloc[0,iyear+1] 
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG==1:
        print(calc_emi)
        print(summary_emi)
    if abs(emi_diff) < 0.0001:
        print('Year '+ year_range_str[iyear]+': Difference < 0.01%: PASS')
    else: 
        print('Year '+ year_range_str[iyear]+': Difference > 0.01%: FAIL, diff: '+str(emi_diff))
        
Flux_Emissions_Total_annual = Flux_array_01_annual

-------------
## Step 5. Write netCDF
------------

In [None]:
# yearly data
#Initialize file
data_IO_fn.initialize_netCDF(gridded_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write data to netCDF
nc_out = Dataset(gridded_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded emissions written to file: {}" .format(os.getcwd())+gridded_outputfile)

----------
## Step 6. Plot Gridded Data
---------

#### Step 6.1. Plot Annual Emission Fluxes

In [None]:
#Plot annual data for entire timeseries
scale_max = 10
save_flag = 0
save_outfile = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_str, scale_max,save_flag,save_outfile)

#### Step 6.2 Plot Difference between first and last inventory year

In [None]:
# Plot difference between last and first year
save_flag = 0
save_outfile = ''
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_diff_str,save_flag,save_outfile)

In [None]:
ct = datetime.datetime.now() 
ft = ct.timestamp() 
time_elapsed = (ft-it)/(60*60)
print('Time to run: '+str(time_elapsed)+' hours')
print('** EXTENSION_GEPA_3A_Livestock_Enteric: COMPLETE **')