# Gridded EPA Methane Inventory
## Category: 3F Field Burning of Agricultural Residues

***
#### Authors: 
Joannes D. Maasakkers, Erin E. McDuffie
#### Date Last Updated: 
see Step 0
#### Notebook Purpose: 
This notebook calculates gridded (0.1⁰x0.1⁰) annual and monthly emission fluxes of methane (molecules CH4/cm2/s) from agricultural field burning activities in the CONUS region for the years 2012 - 2018. Emission fluxes are reported at both annual and monthly time resolution.  
#### Summary & Notes:
The national EPA GHGI emissions from field burning of agricultural residues are read in from the EPA GHG Inventory workbook. Emissions are available as national totals (for entire time series) and state-level allocations (until 2014). National emissions are allocated to the state level using relative state-level emissions data. State-level emissions are allocated to the 0.01⁰x0.01⁰ grid using county and gridded (annual and monthly) data of crop burning emissions from 2003-2007 from McCarty, 2011. Data are then re-gridded to 0.1⁰x0.1⁰ and converted to fluxes (molecules CH4/cm2/s). Annual and monthly emission fluxes (molecules CH4/cm2/s) are written to final netCDFs in the ‘/code/Final_Gridded_Data/’ folder.  
***

-------
## Step 0. Set-Up Notebook Modules, Functions, and Local Parameters and Constants
-------

In [None]:
#Confirm working directory
import os
import time
modtime = os.path.getmtime('./3F_Field_Burning.ipynb')
modificationTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(modtime))
print("This file was last modified on: ", modificationTime)
print('')
print("The directory we are working in is {}" .format(os.getcwd()))

In [None]:
# Import base modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import datetime
from copy import copy

# Import additional modules
# Load plotting package Basemap 
from mpl_toolkits.basemap import Basemap

# Load netCDF (for manipulating netCDF file types)
from netCDF4 import Dataset

# Set up ticker
import matplotlib.ticker as ticker

#add path for the global function module (file)
import sys
module_path = os.path.abspath(os.path.join('../Global_Functions/'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load Tabula (for reading tables from PDFs)
import tabula as tb   
    
# Load user-defined global functions (modules)
import data_load_functions as data_load_fn
import data_functions as data_fn
import data_IO_functions as data_IO_fn
import data_plot_functions as data_plot_fn

In [None]:
#INPUT Files
# Assign global file names
global_filenames = data_load_fn.load_global_file_names()
State_ANSI_inputfile = global_filenames[0]
County_ANSI_inputfile = global_filenames[1]
pop_map_inputfile = global_filenames[2]
Grid_area01_inputfile = global_filenames[3]
Grid_area001_inputfile = global_filenames[4]
Grid_state001_ansi_inputfile = global_filenames[5]
Grid_county001_ansi_inputfile = global_filenames[6]

# Specify names of inputs files used in this notebook
#EPA Data
EPA_burning_inputfile_csv = '../Global_InputData/GHGI/Ch5_Agriculture/Table 5-30.csv'
EPA_burning_inputfile = '../Global_InputData/GHGI/Ch5_Agriculture/FBAR_1990-2018_PR_FINAL_2March2020.xlsx'

#Proxy Data file
Burning_Mapping_inputfile = "./InputData/FieldBurning_ProxyMapping.xlsx"

#Gridded Crop Data
Grid_crop_files = ["./InputData/2003_ResidueBurning_AllCrops.csv","./InputData/2004_ResidueBurning_AllCrops.csv",
                 "./InputData/2005_ResidueBurning_AllCrops.csv","./InputData/2006_ResidueBurning_AllCrops.csv",
                 "./InputData/2007_ResidueBurning_AllCrops.csv"]

#intermediate output file
burning_int_out = './IntermediateOutputs/Intermediate_EPA_v2_4F_Field_burning.nc'

#OUTPUT FILES
gridded_outputfile = '../Final_Gridded_Data/EPA_v2_3F_Field_Burning.nc'
netCDF_description = 'Gridded EPA Inventory - Field Burning of Agricultural Residues Emissions - IPCC Source Category 3F'
netCDF_description_m = 'Gridded EPA Inventory - Monthly Field Burning of Agricultural Residues Emissions - IPCC Source Category 3F'
gridded_month_outputfile = '../Final_Gridded_Data/EPA_v2_3F_Field_Burning_Monthly.nc'
title_str = "EPA methane emissions from the burning of agricultural residues"
title_diff_str = "Emissions from field burning difference: 2018-2012"

#output gridded proxy data
grid_emi_outputfile = '../Final_Gridded_Data/Extension/v2_input_data/Field_Burning_Grid_Emi.nc'

In [None]:
# Define local variables
start_year = 2012  #First year in emission timeseries
end_year = 2018    #Last year in emission timeseries
year_range = [*range(start_year, end_year+1,1)] #List of emission years
year_range_str=[str(i) for i in year_range]
num_years = len(year_range)

# Define constants
Avogadro   = 6.02214129 * 10**(23)  #molecules/mol
Molarch4   = 16.04                  #g/mol
Res01      = 0.1                    # degrees
Res_01     = 0.01
tg_scale   = 0.001                  #Tg scale number [New file allows for the exclusion of the territories] 

# Continental US Lat/Lon Limits (for netCDF files)
Lon_left = -130       #deg
Lon_right = -60       #deg
Lat_low  = 20         #deg
Lat_up  = 55          #deg
loc_dimensions = [Lat_low, Lat_up, Lon_left, Lon_right]

ilat_start = int((90+Lat_low)/Res01) #1100:1450 (continental US range)
ilat_end = int((90+Lat_up)/Res01)
ilon_start = abs(int((-180-Lon_left)/Res01)) #500:1200 (continental US range)
ilon_end = abs(int((-180-Lon_right)/Res01))

# Number of days in each month
month_day_leap  = [  31,  29,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]
month_day_nonleap = [  31,  28,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]

# Month arrays
month_range_str = ['January','February','March','April','May','June','July','August','September','October','November','December']
num_months = len(month_range_str)

#Initialize month dictionary
month_dict = {'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11}

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

In [None]:
# Track run time
ct = datetime.datetime.now() 
it = ct.timestamp() 
print("current time:", ct) 

#### Notebook Options

In [None]:
#Scale gridded emissions so they match state-level crop totals
Scale_To_State = 1

____
## Step 1. Load in State ANSI data and Area Maps
_____

In [None]:
# State-level ANSI Data
#Read the state ANSI file array
State_ANSI, name_dict = data_load_fn.load_state_ansi(State_ANSI_inputfile)[0:2]
#QA: number of states
print('Read input file: '+ f"{State_ANSI_inputfile}")
print('Total "States" found: ' + '%.0f' % len(State_ANSI))
print(' ')

#County ANSI Data
#Includes State ANSI number, county ANSI number, county name, and country area (square miles)
County_ANSI = pd.read_csv(County_ANSI_inputfile,encoding='latin-1')


# 0.01 x0.01 degree Data
# State ANSI IDs and grid cell area (m2) maps
state_ANSI_map = data_load_fn.load_state_ansi_map(Grid_state001_ansi_inputfile)
state_ANSI_map = state_ANSI_map.astype('int32')
#county_ANSI_map = data_load_fn.load_county_ansi_map(Grid_county001_ansi_inputfile)
#county_ANSI_map = county_ANSI_map.astype('int32')
area_map, lat001, lon001 = data_load_fn.load_area_map_001(Grid_area001_inputfile)

# 0.1 x0.1 degree data
# grid cell area and state and county ANSI maps
area_map01, Lat01, Lon01 = data_load_fn.load_area_map_01(Grid_area01_inputfile)[0:3]
#Select relevant Continental 0.1 x0.1 domain
Lat_01 = Lat01[ilat_start:ilat_end]
Lon_01 = Lon01[ilon_start:ilon_end]
area_matrix_01 = data_fn.regrid001_to_01(area_map, Lat_01, Lon_01)
area_matrix_01 *= 10000  #convert from m2 to cm2

state_ANSI_map_01 = data_fn.regrid001_to_01(state_ANSI_map, Lat_01, Lon_01)

# Print time
ct = datetime.datetime.now() 
print("current time:", ct) 

-------------
## Step 2: Read-in and Format Proxy Data
-------------

#### Step 2.1 Read In Proxy Mapping File & Make Proxy Arrays

In [None]:
#load GHGI Mapping Groups
names = pd.read_excel(Burning_Mapping_inputfile, sheet_name = "GHGI Map - Burning", usecols = "A:B",skiprows = 1, header = 0)
colnames = names.columns.values
ghgi_burning_map = pd.read_excel(Burning_Mapping_inputfile, sheet_name = "GHGI Map - Burning", usecols = "A:B", skiprows = 1, names = colnames)
#drop rows with no data, remove the parentheses and ""
ghgi_burning_map = ghgi_burning_map[ghgi_burning_map['GHGI_Emi_Group'] != 'na']
ghgi_burning_map = ghgi_burning_map[ghgi_burning_map['GHGI_Emi_Group'].notna()]
ghgi_burning_map['GHGI_Source']= ghgi_burning_map['GHGI_Source'].str.replace(r"\(","")
ghgi_burning_map['GHGI_Source']= ghgi_burning_map['GHGI_Source'].str.replace(r"\)","")
ghgi_burning_map.reset_index(inplace=True, drop=True)
display(ghgi_burning_map)

#load emission group - proxy map
names = pd.read_excel(Burning_Mapping_inputfile, sheet_name = "Proxy Map - Burning", usecols = "A:E",skiprows = 1, header = 0)
colnames = names.columns.values
proxy_burning_map = pd.read_excel(Burning_Mapping_inputfile, sheet_name = "Proxy Map - Burning", usecols = "A:E", skiprows = 1, names = colnames)
display((proxy_burning_map))

#create empty proxy and emission group arrays (add months for proxy variables that have monthly data)
for igroup in np.arange(0,len(proxy_burning_map)):
    if proxy_burning_map.loc[igroup, 'Grid_Month_Flag'] ==0:
        vars()[proxy_burning_map.loc[igroup,'Proxy_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
        vars()[proxy_burning_map.loc[igroup,'Proxy_Group']+'_nongrid'] = np.zeros([num_years])
    else:
        vars()[proxy_burning_map.loc[igroup,'Proxy_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years,num_months])
        vars()[proxy_burning_map.loc[igroup,'Proxy_Group']+'_nongrid'] = np.zeros([num_years,num_months])
        
    vars()[proxy_burning_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years])
    
    if proxy_burning_map.loc[igroup,'State_Proxy_Group'] != '-':
        if proxy_burning_map.loc[igroup,'State_Month_Flag'] == 0:
            vars()[proxy_burning_map.loc[igroup,'State_Proxy_Group']] = np.zeros([len(State_ANSI),num_years])
        else:
            vars()[proxy_burning_map.loc[igroup,'State_Proxy_Group']] = np.zeros([len(State_ANSI),num_years,num_months])
    else:
        continue # do not make state proxy variable if no variable assigned in mapping file
        
emi_group_names = np.unique(ghgi_burning_map['GHGI_Emi_Group'])

print('QA/QC: Is the number of emission groups the same for the proxy and emissions tabs?')
if (len(emi_group_names) == len(np.unique(proxy_burning_map['GHGI_Emi_Group']))):
    print('PASS')
else:
    print('FAIL')
    

#### Step 2.2. Read in the State Emissions Data

In [None]:
#State Emissions data from the GHGI workbook will be used as the State-level proxy here
# EPA methane emissions in units of kt

#initialize proxy arrays
state_wheat = np.zeros([len(State_ANSI),num_years])
state_maize = np.zeros([len(State_ANSI),num_years])
state_rice = np.zeros([len(State_ANSI),num_years])
state_soybeans = np.zeros([len(State_ANSI),num_years])
state_cotton = np.zeros([len(State_ANSI),num_years])
state_sorghum = np.zeros([len(State_ANSI),num_years])
state_peanuts = np.zeros([len(State_ANSI),num_years])
state_other = np.zeros([len(State_ANSI),num_years])
state_leg = np.zeros([len(State_ANSI),num_years])
state_bar = np.zeros([len(State_ANSI),num_years])
state_oats = np.zeros([len(State_ANSI),num_years])
state_grass = np.zeros([len(State_ANSI),num_years])
state_veg = np.zeros([len(State_ANSI),num_years])
state_tob = np.zeros([len(State_ANSI),num_years])
state_sun = np.zeros([len(State_ANSI),num_years])
state_pot = np.zeros([len(State_ANSI),num_years])
state_pea = np.zeros([len(State_ANSI),num_years])
state_drybean = np.zeros([len(State_ANSI),num_years])
state_beets = np.zeros([len(State_ANSI),num_years])
state_lentils = np.zeros([len(State_ANSI),num_years])
state_chickpeas = np.zeros([len(State_ANSI),num_years])


#initialize (crop x state x year)
#state_emis = np.zeros([21,len(State_ANSI),num_years])
idx_2014 = year_range.index(2014)
xl = pd.ExcelFile(EPA_burning_inputfile)
state_list = xl.sheet_names
#Read in emissions
for istate in np.arange(0, len(State_ANSI)):
    if State_ANSI['name'][istate] in state_list:
        state_name = State_ANSI['name'][istate]
        print(state_name)
        epa_state_temp = pd.read_excel(EPA_burning_inputfile,skiprows=78,nrows=22, sheet_name = state_name)
        epa_state_temp.dropna(axis=0,inplace=True)
        epa_state_temp.rename(columns={'1999':1999}, inplace=True)
        epa_state_temp = epa_state_temp.drop(columns = [*range(1990, start_year,1)])
        epa_state_temp.rename(columns={epa_state_temp.columns[0]:'Source'}, inplace=True)
        epa_state_temp.reset_index(inplace=True,drop=True)
        #display(epa_state_temp)
        for iyear in np.arange(0,num_years):
            if year_range[iyear] > 2014:
                year_idx = idx_2014
            else:
                year_idx = iyear
            for icrop in np.arange(0,len(epa_state_temp)):
                if epa_state_temp.loc[icrop,'Source'] == 'Cereal/Wheat':
                    state_wheat[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Cereal/Maize':
                    state_maize[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Cereal/Other/Rice':
                    state_rice[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Pulses/Other/Soybeans':
                    state_soybeans[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Other/Cotton':
                    state_cotton[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Cereals/Other/Sorghum':
                    state_sorghum[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Pulses/Other/Peanuts':
                    state_peanuts[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Cereals/Other/Other Small Grains':
                    state_other[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Other/Legume Hay':
                    state_leg[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Cereals/Barley':
                    state_bar[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Cereals/Oats':
                    state_oats[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Other/Grass Hay':
                    state_grass[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Other/Vegetables':
                    state_veg[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Other/Tobacco':
                    state_tob[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Other/Sunflower':
                    state_sun[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Tubers And Roots/Other/Potatoes':
                    state_pot[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Other/Pulses/Peas':
                    state_pea[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Pulses/Other/Dry Beans':
                    state_drybean[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Tubers And Roots/Other/Sugarbeets':
                    state_beets[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Pulses/Other/Lentils':
                    state_lentils[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                elif epa_state_temp.loc[icrop,'Source'] == 'Pulses/Other/Chickpeas':
                    state_chickpeas[istate,iyear] = epa_state_temp.iloc[icrop,year_idx+1]
                else:
                    print(istate, icrop, epa_state_temp.loc[icrop,'Source'])

    #display(state_emis[:,0,:])

#### Step 2.3. Read McCarty burning data (2003-2007)

In [None]:
num_files = 5

#initialize proxy arrays
grid_maize_mon = np.zeros([len(lat001),len(lon001),num_months])
grid_cotton_mon = np.zeros([len(lat001),len(lon001),num_months])
grid_rice_mon = np.zeros([len(lat001),len(lon001),num_months])
grid_soybean_mon = np.zeros([len(lat001),len(lon001),num_months])
grid_wheat_mon = np.zeros([len(lat001),len(lon001),num_months])
grid_other_mon = np.zeros([len(lat001),len(lon001),num_months])

for ifile in np.arange(0,num_files):
    McC_temp = pd.read_csv(Grid_crop_files[ifile])
    #Remove observations without date
    McC_temp = McC_temp[McC_temp['Burn_Date'] != ' ']
    McC_temp.reset_index(inplace=True, drop=True)
    print(ifile)
    for irow in np.arange(len(McC_temp)):
        #Decide month based on burn date
        if ifile == 3:
            imonth = month_dict[McC_temp['Burn_Date'][irow][-3:]]
        else:
            imonth = month_dict[McC_temp['Burn_Date'][irow][:3]]
        #Decide whether in domain
        if McC_temp['Longitude'][irow] > Lon_left and \
            McC_temp['Longitude'][irow] < Lon_right and \
            McC_temp['Latitude'][irow] > Lat_low and  \
            McC_temp['Latitude'][irow] < Lat_up:
            
            #Calculate lat/lon
            ilat = int((McC_temp['Latitude'][irow] - Lat_low)/Res_01)
            ilon = int((McC_temp['Longitude'][irow] - Lon_left)/Res_01)
            
            #Create running sum for each group
            if McC_temp['Crop_Type'][irow] == 'corn':
                grid_maize_mon[ilat,ilon,imonth] += McC_temp['EMCH4_Gg'][irow]
            elif McC_temp['Crop_Type'][irow] == 'rice':
                grid_rice_mon[ilat,ilon,imonth] += McC_temp['EMCH4_Gg'][irow]
            elif McC_temp['Crop_Type'][irow] == 'soybean':
                grid_soybean_mon[ilat,ilon,imonth] += McC_temp['EMCH4_Gg'][irow]
            elif McC_temp['Crop_Type'][irow] == 'cotton':
                grid_cotton_mon[ilat,ilon,imonth] += McC_temp['EMCH4_Gg'][irow]
            elif McC_temp['Crop_Type'][irow] == 'wheat':
                grid_wheat_mon[ilat,ilon,imonth] += McC_temp['EMCH4_Gg'][irow]     
            elif McC_temp['Crop_Type'][irow] in ['Kentucky bluegrass', 'other crop/fallow']:
                grid_other_mon[ilat,ilon,imonth] += McC_temp['EMCH4_Gg'][irow]     

-----------
## Step 3. Read in and Format US EPA GHGI Emissions
----------

In [None]:
#Read in data from the GHGI (in units of kt)

EPA_emi_burning_CH4 = pd.read_excel(EPA_burning_inputfile,skiprows=78,nrows=23, sheet_name = 'National Emissions')
EPA_emi_burning_CH4.dropna(axis=0,inplace=True)
EPA_emi_burning_CH4.rename(columns={'1999':1999}, inplace=True)
EPA_emi_burning_CH4 = EPA_emi_burning_CH4.drop(columns = [*range(1990, start_year,1)])
EPA_emi_burning_CH4.rename(columns={EPA_emi_burning_CH4.columns[0]:'Source'}, inplace=True)
display(EPA_emi_burning_CH4)

#### 3.2. Split Emissions into Gridding Groups (each Group will have the same proxy applied during the state allocation/gridding)

In [None]:
#split emissions into scaling groups

DEBUG =1

start_year_idx = EPA_emi_burning_CH4.columns.get_loc((start_year))
end_year_idx = EPA_emi_burning_CH4.columns.get_loc((end_year))+1
ghgi_burning_groups = ghgi_burning_map['GHGI_Emi_Group'].unique()
sum_emi = np.zeros([num_years])


for igroup in np.arange(0,len(ghgi_burning_groups)): #loop through all groups, finding the GHGI sources in that group and summing emissions for that region, year        vars()[ghgi_prod_groups[igroup]] = np.zeros([num_regions-1,num_years])
    ##DEBUG## print(ghgi_burning_groups[igroup])
    vars()[ghgi_burning_groups[igroup]] = np.zeros([num_years])
    source_temp = ghgi_burning_map.loc[ghgi_burning_map['GHGI_Emi_Group'] == ghgi_burning_groups[igroup], 'GHGI_Source']
    pattern_temp  = '|'.join(source_temp) 
    emi_temp = EPA_emi_burning_CH4[EPA_emi_burning_CH4['Source'].str.contains(pattern_temp)]
    ##DEBUG## display(emi_temp)
    vars()[ghgi_burning_groups[igroup]][:] = emi_temp.iloc[:,start_year_idx:].sum()
    ##DEBUG## display(vars()[ghgi_burning_groups[igroup]][:])
        
        
#Check against total summary emissions 
print('QA/QC #1: Check Processing Emission Sum against GHGI Summary Emissions')
for iyear in np.arange(0,num_years): 
    for igroup in np.arange(0,len(ghgi_burning_groups)):
        sum_emi[iyear] += vars()[ghgi_burning_groups[igroup]][iyear]
        
    summary_emi = EPA_emi_burning_CH4.iloc[-1,iyear+1]  
    diff1 = abs(sum_emi[iyear] - summary_emi)/((sum_emi[iyear] + summary_emi)/2)
    if DEBUG ==1:
        print(summary_emi)
        print(sum_emi[iyear])
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL (check Production & summary tabs): ', diff1,'%') 

--------------
## Step 4. Grid Data
-------------

#### Step 4.1. Allocate emissions

##### Step 4.1.1 Assign the Appropriate Proxy Variable Names (state & grid)

In [None]:
# The names on the *left* need to match the 'FieldBurning_ProxyMapping' 'State_Proxy_Group' names 
# (these are initialized in Step 2). 
# The names on the *right* are the variable names used to caluclate the proxies in this code.
# Names on the right need to match those from the code in Step 2

#national --> state proxies (state x year [X month])
State_wheat = state_wheat
State_maize = state_maize
State_rice = state_rice
State_soybeans = state_soybeans
State_cotton = state_cotton
State_sorghum = state_sorghum
State_peanuts = state_peanuts
State_other_grains = state_other
State_leghay = state_leg
State_barley = state_bar
State_oats = state_oats
State_grasshay = state_grass
State_vegetables = state_veg
State_tobacco = state_tob
State_sunflower = state_sun
State_potatoes = state_pot
State_peas = state_pea
State_drybeans = state_drybean
State_sugarbeets = state_beets
State_lentils = state_lentils
State_chickpeas = state_chickpeas

#county --> grid proxies (0.01x0.01 [xmonth])
Map_wheat = grid_wheat_mon
Map_maize = grid_maize_mon
Map_rice = grid_rice_mon
Map_soybeans = grid_soybean_mon
Map_cotton = grid_cotton_mon
Map_other = grid_other_mon

# remove variables to clear space for larger arrays 
del grid_wheat_mon, grid_maize_mon, grid_rice_mon, grid_soybean_mon, grid_cotton_mon, grid_other_mon

Map_total = Map_wheat+Map_maize+Map_rice+Map_soybeans+Map_cotton+Map_other

##### Step 4.1.2 Allocate National EPA Emissions to the State-Level

In [None]:
# Calculate state-level emissions 
# Emissions in kt
# State data = national GHGI emissions * state proxy/national total

DEBUG = 1

# Note that national emissions are retained for groups that do not have state proxies (identified in the mapping file)
# and are gridded in the next step

# Make placeholder emission arrays for each group
for igroup in np.arange(0,len(proxy_burning_map)):
    #if proxy_burning_map.loc[igroup,'State_Month_Flag'] ==1:
    vars()['State_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(State_ANSI),num_years])
    #else:
    #    vars()['State_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(State_ANSI),num_years])
    vars()['NonState_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years])
        
#Loop over years
for iyear in np.arange(num_years):
    #Loop over states
    for istate in np.arange(len(State_ANSI)):
        for igroup in np.arange(0,len(proxy_burning_map)):    
            if proxy_burning_map.loc[igroup,'State_Proxy_Group'] != '-' and proxy_burning_map.loc[igroup,'GHGI_Emi_Group'] != 'Emi_not_mapped':
                vars()['State_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][istate,iyear] = \
                    vars()[proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][iyear]* \
                    data_fn.safe_div(vars()[proxy_burning_map.loc[igroup,'State_Proxy_Group']][istate,iyear], \
                                     np.sum(vars()[proxy_burning_map.loc[igroup,'State_Proxy_Group']][:,iyear]))   
            else:
                vars()['NonState_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][iyear] = vars()[proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][iyear]
                
# Check sum of all gridded emissions + emissions not included in state allocation
print('QA/QC #1: Check weighted emissions against GHGI')   
for iyear in np.arange(0,num_years):
    summary_emi = EPA_emi_burning_CH4.iloc[-1,iyear+1] 
    calc_emi = 0
    for igroup in np.arange(0,len(proxy_burning_map)):
        calc_emi +=  np.sum(vars()['State_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][:,iyear])+\
            vars()['NonState_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][iyear] #np.sum(Emissions[:,iyear]) + Emissions_nongrid[iyear] + Emissions_nonstate[iyear]
    if DEBUG ==1:
        print(summary_emi)
        print(calc_emi)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

##### 4.1.3 Allocate state emissions to the CONUS region (0.1x0.1)

In [None]:
#will need to save yearly emissions as intermediate output and read back in due to memory limits
data_IO_fn.initialize_netCDF001(burning_int_out, netCDF_description, 1, year_range, loc_dimensions, lat001, lon001)

In [None]:
# Allocate County-Level emissions (kt) onto a 0.1x0.1 grid using gridcell level 'Proxy_Groups'

DEBUG =1
#Define emission arrays
Emissions_array_01 = np.zeros([len(Lat_01),len(Lon_01),num_years,num_months])
Emissions_nongrid = np.zeros([num_years])
running_sum = np.zeros([len(proxy_burning_map),num_years])


# For each year, (2a) distribute state-level emissions onto a grid using proxies defined above ....
# To speed up the code, masks are used rather than looping individually through each lat/lon. 
# In this case, a mask of 1's is made for the grid cells that match the ANSI values for a given state
# The masked values are set to zero, remaining values = 1. 
# AK and HI and territories are removed from the analysis at this stage. 
# The emissions allocated to each state are at 0.01x0.01 degree resolution, as required to calculate accurate 'mask'
# arrays for each state. 
# (2b - not applicable here) For emission groups that were not first allocated to states, national emissions for those groups are gridded
# based on the relevant gridded proxy arrays (0.1x0.1 resolution). These emissions are at 0.1x0.1 degrees resolution. 
# (2c - not applicable here) - record 'not mapped' emission groups in the 'non-grid' array#

print('**QA/QC Check: Sum of national gridded emissions vs. GHGI national emissions')
     
    
#1. Step through each gridding group
for igroup in np.arange(0,len(proxy_burning_map)):
    print(igroup, 'of', len(proxy_burning_map))
    vars()['Ext_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    
    proxy_temp = vars()[proxy_burning_map.loc[igroup,'Proxy_Group']] #latxlonxmonth
    #2a. Step through each state (if group was previously allocated to state level)
    if proxy_burning_map.loc[igroup,'State_Proxy_Group'] != '-' and proxy_burning_map.loc[igroup,'State_Proxy_Group'] != 'state_not_mapped':
        for istate in np.arange(0,len(State_ANSI)):
            if State_ANSI['abbr'][istate] not in {'AK','HI'} and istate < 51:
                #print()
                
                mask_state = np.ma.ones(np.shape(state_ANSI_map))
                mask_state = np.ma.masked_where(state_ANSI_map != State_ANSI['ansi'][istate], mask_state)
                mask_state = np.ma.filled(mask_state,0)
                state_temp = vars()['State_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][istate,:]
                if np.sum(state_temp[:]) > 0 :
                    if np.sum(mask_state*np.sum(proxy_temp[:,:,:],axis=2)) > 0:
                        for imonth in np.arange(0, num_months):
                            weighted_array = data_fn.safe_div(mask_state*proxy_temp[:,:,imonth],np.sum(mask_state*np.sum(proxy_temp[:,:,:],axis=2)))
                            weighted_array_01 = data_fn.regrid001_to_01(weighted_array[:,:], Lat_01, Lon_01)
                            for iyear in np.arange(0, num_years):
                                Emissions_array_01[:,:,iyear,imonth] += state_temp[iyear]*weighted_array_01
                                running_sum[igroup,iyear] += np.sum(state_temp[iyear]*weighted_array_01)
                                vars()['Ext_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += state_temp[iyear]*weighted_array_01 #record running sum of month emissions for that state
                                                            
                    elif np.sum(mask_state*np.sum(proxy_temp[:,:,:],axis=2)) == 0:
                        if np.sum(mask_state*np.sum(Map_total[:,:,:],axis=2)) == 0:
                            for iyear in np.arange(0, num_years):
                                Emissions_nongrid[iyear] += state_temp[iyear] #if no crop bruning data in that state...
                        else:
                            for imonth in np.arange(0, num_months):
                                # if there is no burning data for that crop, but there are state emissions, weight by relative total burning emissions... 
                                weighted_array = data_fn.safe_div(mask_state*Map_total[:,:,imonth],np.sum(mask_state*np.sum(Map_total[:,:,:],axis=2)))       
                                weighted_array_01 = data_fn.regrid001_to_01(weighted_array[:,:], Lat_01, Lon_01)
                                for iyear in np.arange(0, num_years):
                                    Emissions_array_01[:,:,iyear,imonth] += state_temp[iyear]*weighted_array_01
                                    running_sum[igroup,iyear] += np.sum(state_temp[iyear]*weighted_array_01)
                                    vars()['Ext_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += state_temp[iyear]*weighted_array_01 #keep running sum of month emissions for that state
                    else:
                        for iyear in np.arange(0, num_years):
                            Emissions_nongrid[iyear] += state_temp[iyear]
                
            else:
                for iyear in np.arange(0, num_years):
                    state_temp = vars()['State_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][istate,iyear]
                    Emissions_nongrid[iyear] += state_temp 
        
        print(igroup)
        print(running_sum[igroup,0])
        print(np.sum(Emissions_array_01[:,:,0,:]))
        print(np.sum(vars()['Ext_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][:,:,0]))
        
    
for iyear in np.arange(0, num_years): 
    calc_emi2 = 0
    calc_emi = np.sum(Emissions_array_01[:,:,iyear,:]) + np.sum(Emissions_nongrid[iyear]) 
    for igroup in np.arange(0, len(proxy_burning_map)):
        calc_emi2 += np.sum(vars()['Ext_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])
    calc_emi2 += np.sum(Emissions_nongrid[iyear])
    summary_emi = summary_emi = EPA_emi_burning_CH4.iloc[-1,iyear+1] 
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG ==1:
        print(summary_emi)
        print(calc_emi)
        print(calc_emi2)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

In [None]:
for iyear in np.arange(0, num_years): 
    calc_emi2 = 0
    calc_emi = np.sum(Emissions_array_01[:,:,iyear,:]) + np.sum(Emissions_nongrid[iyear]) 
    for igroup in np.arange(0, len(proxy_burning_map)):
        calc_emi2 += np.sum(vars()['Ext_'+proxy_burning_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])
    calc_emi2 += np.sum(Emissions_nongrid[iyear])
    summary_emi = summary_emi = EPA_emi_burning_CH4.iloc[-1,iyear+1] 
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG ==1:
        print(summary_emi)
        print(calc_emi)
        print(calc_emi2)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

#### Step 4.2.2 Save gridded emissions (kt)

In [None]:
#save gridded emissions for each gridding group - for extension

#Initialize file
data_IO_fn.initialize_netCDF(grid_emi_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

unique_groups = np.unique(proxy_burning_map['GHGI_Emi_Group'])
unique_groups = list(unique_groups[unique_groups != 'Emi_not_mapped'])
print(unique_groups)

nc_out = Dataset(grid_emi_outputfile, 'r+', format='NETCDF4')
#nc_out.createDimension('state', len(State_ANSI))

for igroup in np.arange(0,len(unique_groups)):
    print('Ext_'+unique_groups[igroup])
    if len(np.shape(vars()['Ext_'+unique_groups[igroup]])) ==4:
        ghgi_temp = np.sum(vars()['Ext_'+unique_groups[igroup]],axis=3) #sum month data if data is monthly
    else:
        ghgi_temp = vars()['Ext_'+unique_groups[igroup]]

    # Write data to netCDF
    data_out = nc_out.createVariable('Ext_'+unique_groups[igroup], 'f8', ('lat', 'lon','year'), zlib=True)
    data_out[:,:,:] = ghgi_temp[:,:,:]

#save nongrid data to calculate non-grid fraction extension
data_out = nc_out.createVariable('Emissions_nongrid', 'f8', ('year'), zlib=True)  
data_out[:] = np.sum(Emissions_nongrid[:])

nc_out.close()

#Confirm file location
print('** SUCCESS **')
print("Gridded emissions (kt) written to file: {}" .format(os.getcwd())+grid_emi_outputfile)
print(' ')

del data_out, ghgi_temp, nc_out


#### 4.2. Calculate Gridded Emission Fluxes (molec./cm2/s) (0.1x0.1)

In [None]:
#Convert emissions to emission flux
# conversion: kt emissions to molec/cm2/s flux

DEBUG = 1

Flux_array_01 = np.zeros([len(Lat_01),len(Lon_01),num_years,num_months])
Flux_array_01_annual = np.zeros([len(Lat_01),len(Lon_01),num_years])
check_sum = np.zeros([num_years])
check_sum_annual = np.zeros([num_years])

print('**QA/QC Check: Sum of national gridded emissions vs. GHGI national emissions')
  
for iyear in np.arange(0,num_years):
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
        month_days = month_day_leap
    else:
        year_days = np.sum(month_day_nonleap)
        month_days = month_day_nonleap
        
    # calculate fluxes for each emissions group and national sum  (=kt * grams/kt *molec/mol *mol/g *s^-1 * cm^-2)
    conversion_factor_annual = 10**9 * Avogadro / float(Molarch4 * np.sum(month_days) * 24 * 60 *60) / area_matrix_01
    
    #if proxy_livestock_map.loc[igroup, 'Month_Flag'] == 1:
    for imonth in np.arange(0,num_months):
        conversion_factor_month = 10**9 * Avogadro / float(Molarch4 * month_days[imonth] * 24 * 60 *60) / area_matrix_01
        conv_factor2 = month_days[imonth]/year_days
        Flux_array_01[:,:,iyear,imonth] = Emissions_array_01[:,:,iyear,imonth] * conversion_factor_month
        Flux_array_01_annual[:,:,iyear] += Flux_array_01[:,:,iyear,imonth]*conv_factor2        
        #calculate the monthly running flux totals and convert from flux back to mass (also calc annual sum)    
        check_sum[iyear] += np.sum(Flux_array_01[:,:,iyear,imonth]/conversion_factor_month)
    check_sum_annual[iyear] += np.sum(Flux_array_01_annual[:,:,iyear]/conversion_factor_annual)
        
    #convert back to mass to check
    calc_emi = check_sum_annual[iyear] +np.sum(Emissions_nongrid[iyear]) 
    calc_emi2 = check_sum[iyear] +np.sum(Emissions_nongrid[iyear]) 
    
    summary_emi = EPA_emi_burning_CH4.iloc[-1,iyear+1] 
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG ==1:
        print(calc_emi)
        print(calc_emi2)
        print(summary_emi)
    if abs(emi_diff) < 0.00015:
        print('Year '+ year_range_str[iyear]+': Difference < 0.01%: PASS')
    else: 
        print('Year '+ year_range_str[iyear]+': Difference > 0.01%: FAIL, diff: '+str(emi_diff))
        
Flux_Emissions_Total_annual = Flux_array_01_annual


-------------
## Step 5. Write netCDF
------------

In [None]:
# monthly data
#Initialize file
data_IO_fn.initialize_netCDF(gridded_month_outputfile, netCDF_description_m, 1, year_range, loc_dimensions, Lat_01, Lon_01)

# Write data to netCDF
nc_out = Dataset(gridded_month_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:,:] = Flux_array_01
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded fluxes written to file: {}" .format(os.getcwd())+gridded_month_outputfile)

# yearly data
#Initialize file
data_IO_fn.initialize_netCDF(gridded_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write data to netCDF
nc_out = Dataset(gridded_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded field burning fluxes written to file: {}" .format(os.getcwd())+gridded_outputfile)

----------
## Step 6. Plot Gridded Data
---------

#### Step 6.1. Plot Annual Emission Fluxes

In [None]:
#Plot Annual Data
scale_max = 0.10
save_flag =0
save_outfile = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_str,scale_max,save_flag,save_outfile)

#### Step 6.2 Plot Difference between first and last inventory year

In [None]:
# Plot difference between last and first year
save_flag =0
save_outfile = ''
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_diff_str,save_flag, save_outfile)

In [None]:
ct = datetime.datetime.now() 
ft = ct.timestamp() 
time_elapsed = (ft-it)/(60*60)
print('Time to run: '+str(time_elapsed)+' hours')
print('** GEPA_3F_Field_Burning_of_Agricultural_Residues: COMPLETE **')