# Gridded EPA Methane Inventory
## Category: 4C Rice Cultivation

***
#### Authors: 
Joannes D. Maasakkers, Candice F. Z. Chen, Erin E. McDuffie
#### Date Last Updated: 
see Step 0
#### Notebook Purpose: 
This notebook calculates gridded (0.1⁰x0.1⁰) annual and monthly emission fluxes of methane (molecules CH4/cm2/s) from rice cultivation activities in the CONUS region for the years 2012 - 2018. Emission fluxes are reported at both annual and monthly time resolution.  
#### Summary & Notes:
The national EPA GHGI emissions from rice cultivation are read in from the EPA GHG Inventory Rice workbook. Emissions are available as national totals (for entire time series) and state-level allocations (until 2015). National emissions are allocated to the state level using relative state-level emissions data. State-level emissions are allocated to the county level and then on to 0.01⁰x0.01⁰ grid using county and gridded data of rice harvested areas from USDA. Data are then re-gridded to 0.1⁰x0.1⁰ and converted to fluxes (molecules CH4/cm2/s). Lastly, a monthly scaling factor is applied to the annual flux data (constant for all years). Annual and monthly emission fluxes (molecules CH4/cm2/s) are written to final netCDFs in the ‘/code/Final_Gridded_Data/’ folder.  
***

-------
## Step 0. Set-Up Notebook Modules, Functions, and Local Parameters and Constants
-------

In [None]:
#Confirm working directory
import os
import time
modtime = os.path.getmtime('./3C_Rice.ipynb')
modificationTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(modtime))
print("This file was last modified on: ", modificationTime)
print('')
print("The directory we are working in is {}" .format(os.getcwd()))

In [None]:
## Include plots within notebook
%matplotlib inline

In [None]:
# Import base modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import datetime
from copy import copy

# Import additional modules
# Load plotting package Basemap 
from mpl_toolkits.basemap import Basemap

# Load netCDF (for manipulating netCDF file types)
from netCDF4 import Dataset

# Set up ticker
import matplotlib.ticker as ticker

#add path for the global function module (file)
import sys
module_path = os.path.abspath(os.path.join('../Global_Functions/'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load Tabula (for reading tables from PDFs)
import tabula as tb   
    
# Load user-defined global functions (modules)
import data_load_functions as data_load_fn
import data_functions as data_fn
import data_IO_functions as data_IO_fn
import data_plot_functions as data_plot_fn

In [None]:
#INPUT Files
# Assign global file names
global_filenames = data_load_fn.load_global_file_names()
State_ANSI_inputfile = global_filenames[0]
County_ANSI_inputfile = global_filenames[1]
pop_map_inputfile = global_filenames[2]
Grid_area01_inputfile = global_filenames[3]
Grid_area001_inputfile = global_filenames[4]
Grid_state001_ansi_inputfile = global_filenames[5]
Grid_county001_ansi_inputfile = global_filenames[6]

# Specify names of inputs files used in this notebook
#EPA Data
EPA_rice_inputfile = '../Global_InputData/GHGI/Ch5_Agriculture/Rice_1990-2018_PR_FINAL.xlsx'

#Proxy Data file
Rice_Mapping_inputfile = "./InputData/Rice_ProxyMapping.xlsx"


#Activity Data
#USDA Census Data
State_2012rice_file = "./InputData/Census_2012_RiceAcres_State.csv"
State_2017rice_file = "./InputData/Census_2017_RiceAcres_State.csv"
County_2012rice_file = "./InputData/Census_2012_RiceAcres_County.csv"
County_2017rice_file = "./InputData/Census_2017_RiceAcres_County.csv"

#USDA gridded rice crop acreage
Rice_list = "./InputData/Rice_"

#monthly scaling factors
Bloom_month_factors_file = './InputData/Rice_Emissions_Scenario_D_MAY16.nc'

#OUTPUT FILES
gridded_outputfile = '../Final_Gridded_Data/EPA_v2_3C_Rice_Cultivation.nc'
netCDF_description = 'Gridded EPA Inventory - Rice Cultivation Emissions - IPCC Source Category 3C'
netCDF_description_m = 'Gridded EPA Inventory - Monthly Rice Cultivation Emissions - IPCC Source Category 3C'
gridded_month_outputfile = '../Final_Gridded_Data/EPA_v2_3C_Rice_Cultivation_Monthly.nc'
title_str = "EPA methane emissions from rice cultivation"
title_diff_str = "Emissions from rice cultivation difference: 2018-2012"

#output gridded proxy data
grid_emi_outputfile = '../Final_Gridded_Data/Extension/v2_input_data/Rice_Grid_Emi.nc'

In [None]:
# Define local variables
start_year = 2012  #First year in emission timeseries
end_year = 2018    #Last year in emission timeseries
year_range = [*range(start_year, end_year+1,1)] #List of emission years
year_range_str=[str(i) for i in year_range]
num_years = len(year_range)

# Define constants
Avogadro   = 6.02214129 * 10**(23)  #molecules/mol
Molarch4   = 16.04                  #g/mol
Res01      = 0.1                    # degrees
Res_01     = 0.01
tg_scale   = 0.001                  #Tg scale number [New file allows for the exclusion of the territories] 

# Continental US Lat/Lon Limits (for netCDF files)
Lon_left = -130       #deg
Lon_right = -60       #deg
Lat_low  = 20         #deg
Lat_up  = 55          #deg
loc_dimensions = [Lat_low, Lat_up, Lon_left, Lon_right]

ilat_start = int((90+Lat_low)/Res01) #1100:1450 (continental US range)
ilat_end = int((90+Lat_up)/Res01)
ilon_start = abs(int((-180-Lon_left)/Res01)) #500:1200 (continental US range)
ilon_end = abs(int((-180-Lon_right)/Res01))

# Number of days in each month
month_day_leap  = [  31,  29,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]
month_day_nonleap = [  31,  28,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]

# Month arrays
month_range_str = ['January','February','March','April','May','June','July','August','September','October','November','December']
num_months = len(month_range_str)

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

In [None]:
# Track run time
ct = datetime.datetime.now() 
it = ct.timestamp() 
print("current time:", ct) 

____
## Step 1. Load in State ANSI data and Area Maps
_____

In [None]:
# State-level ANSI Data
#Read the state ANSI file array
State_ANSI, name_dict = data_load_fn.load_state_ansi(State_ANSI_inputfile)[0:2]
#QA: number of states
print('Read input file: '+ f"{State_ANSI_inputfile}")
print('Total "States" found: ' + '%.0f' % len(State_ANSI))
print(' ')

#County ANSI Data
#Includes State ANSI number, county ANSI number, county name, and country area (square miles)
County_ANSI = pd.read_csv(County_ANSI_inputfile,encoding='latin-1')

#QA: number of counties
print ('Read input file: ' + f"{County_ANSI_inputfile}")
print('Total "Counties" found (include PR): ' + '%.0f' % len(County_ANSI))
print(' ')

#Create a placeholder array for county data
county_array = np.zeros([len(County_ANSI),3])

#Populate array with State ANSI number (0), county ANSI number (1), and county area (2)
for icounty in np.arange(0,len(County_ANSI)):
    county_array[icounty,0] = int(County_ANSI.values[icounty,0])
    county_array[icounty,1] = int(County_ANSI.values[icounty,1])
    county_array[icounty,2] = County_ANSI.values[icounty,3]

# 0.01 x0.01 degree Data
# State ANSI IDs and grid cell area (m2) maps
state_ANSI_map = data_load_fn.load_state_ansi_map(Grid_state001_ansi_inputfile)
state_ANSI_map = state_ANSI_map.astype('int32')
county_ANSI_map = data_load_fn.load_county_ansi_map(Grid_county001_ansi_inputfile)
county_ANSI_map = county_ANSI_map.astype('int32')
area_map, lat001, lon001 = data_load_fn.load_area_map_001(Grid_area001_inputfile)

# 0.1 x0.1 degree data
# grid cell area and state and county ANSI maps
area_map01, Lat01, Lon01 = data_load_fn.load_area_map_01(Grid_area01_inputfile)[0:3]
#Select relevant Continental 0.1 x0.1 domain
Lat_01 = Lat01[ilat_start:ilat_end]
Lon_01 = Lon01[ilon_start:ilon_end]
area_matrix_01 = data_fn.regrid001_to_01(area_map, Lat_01, Lon_01)
area_matrix_01 *= 10000  #convert from m2 to cm2

state_ANSI_map_01 = data_fn.regrid001_to_01(state_ANSI_map, Lat_01, Lon_01)

# Print time
ct = datetime.datetime.now() 
print("current time:", ct) 

-------------
## Step 2: Read-in and Format Proxy Data
-------------

#### Step 2.1 Read In Proxy Mapping File & Make Proxy Arrays

In [None]:
#load GHGI Mapping Groups
names = pd.read_excel(Rice_Mapping_inputfile, sheet_name = "GHGI Map - Rice", usecols = "A:B",skiprows = 1, header = 0)
colnames = names.columns.values
ghgi_rice_map = pd.read_excel(Rice_Mapping_inputfile, sheet_name = "GHGI Map - Rice", usecols = "A:B", skiprows = 1, names = colnames)
#drop rows with no data, remove the parentheses and ""
ghgi_rice_map = ghgi_rice_map[ghgi_rice_map['GHGI_Emi_Group'] != 'na']
ghgi_rice_map = ghgi_rice_map[ghgi_rice_map['GHGI_Emi_Group'].notna()]
ghgi_rice_map['GHGI_Source']= ghgi_rice_map['GHGI_Source'].str.replace(r"\(","")
ghgi_rice_map['GHGI_Source']= ghgi_rice_map['GHGI_Source'].str.replace(r"\)","")
ghgi_rice_map.reset_index(inplace=True, drop=True)
display(ghgi_rice_map)

#load emission group - proxy map
names = pd.read_excel(Rice_Mapping_inputfile, sheet_name = "Proxy Map - Rice", usecols = "A:G",skiprows = 1, header = 0)
colnames = names.columns.values
proxy_rice_map = pd.read_excel(Rice_Mapping_inputfile, sheet_name = "Proxy Map - Rice", usecols = "A:G", skiprows = 1, names = colnames)
display((proxy_rice_map))

#create empty proxy and emission group arrays (add months for proxy variables that have monthly data)
for igroup in np.arange(0,len(proxy_rice_map)):
    if proxy_rice_map.loc[igroup, 'Grid_Month_Flag'] ==0:
        vars()[proxy_rice_map.loc[igroup,'Proxy_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
        vars()[proxy_rice_map.loc[igroup,'Proxy_Group']+'_nongrid'] = np.zeros([num_years])
    else:
        vars()[proxy_rice_map.loc[igroup,'Proxy_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years,num_months])
        vars()[proxy_rice_map.loc[igroup,'Proxy_Group']+'_nongrid'] = np.zeros([num_years,num_months])
        
    vars()[proxy_rice_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years])
    
    if proxy_rice_map.loc[igroup,'State_Proxy_Group'] != '-':
        if proxy_rice_map.loc[igroup,'State_Month_Flag'] == 0:
            vars()[proxy_rice_map.loc[igroup,'State_Proxy_Group']] = np.zeros([len(State_ANSI),num_years])
        else:
            vars()[proxy_rice_map.loc[igroup,'State_Proxy_Group']] = np.zeros([len(State_ANSI),num_years,num_months])
    else:
        continue # do not make state proxy variable if no variable assigned in mapping file
        
    if proxy_rice_map.loc[igroup,'County_Proxy_Group'] != '-':
        if proxy_rice_map.loc[igroup,'County_Month_Flag'] == 0:
            vars()[proxy_rice_map.loc[igroup,'County_Proxy_Group']] = np.zeros([len(State_ANSI),len(County_ANSI),num_years])
        else:
            vars()[proxy_rice_map.loc[igroup,'County_Proxy_Group']] = np.zeros([len(State_ANSI),len(County_ANSI),num_years,num_months])
    else:
        continue # do not make state proxy variable if no variable assigned in mapping file

        
emi_group_names = np.unique(ghgi_rice_map['GHGI_Emi_Group'])

print('QA/QC: Is the number of emission groups the same for the proxy and emissions tabs?')
if (len(emi_group_names) == len(np.unique(proxy_rice_map['GHGI_Emi_Group']))):
    print('PASS')
else:
    print('FAIL')
    

#### Step 2.2. Read in the State Emissions Data

In [None]:
#State Emissions data from the GHGI workbook will be used as the State-level proxy here
# EPA methane emissions in units of MMT CO2e, convert to kt (/25)

#initialize 
state_emis = np.zeros([len(State_ANSI),num_years])
idx_2015 = year_range.index(2015)
#Read in emissions
EPA_Rice_Emissions = pd.read_excel(EPA_rice_inputfile,skiprows=3, sheet_name = 'Total Methane Emissions')
EPA_Rice_Emissions.dropna(axis=0,inplace=True)

#Remove non-state rows
for irow in np.arange(len(EPA_Rice_Emissions)):
    if EPA_Rice_Emissions['State'][irow] == 'Tier 1':
        EPA_Rice_Emissions.drop([irow],inplace=True)
    elif EPA_Rice_Emissions['State'][irow] == 'Tier 3':
        EPA_Rice_Emissions.drop([irow],inplace=True)
    elif EPA_Rice_Emissions['State'][irow] == 'Tier 1 Total':
        EPA_Rice_Emissions.drop([irow],inplace=True)
    elif EPA_Rice_Emissions['State'][irow] == 'Tier 3 Total':
        EPA_Rice_Emissions.drop([irow],inplace=True)
EPA_Rice_Emissions = EPA_Rice_Emissions.drop(columns = [*range(1990, start_year,1)])
EPA_Rice_Emissions = EPA_Rice_Emissions[EPA_Rice_Emissions['State'] != 'Total'] #drop total row
EPA_Rice_Emissions = EPA_Rice_Emissions.replace('NE',0)
EPA_Rice_Emissions.reset_index(inplace=True, drop=True)

#for all years after 2015, apply the 2013-2015 average value (to be consistant with the GHGI)
for iyear in np.arange(0, num_years):
    if year_range[iyear] <= 2015:
        for istate in np.arange(0, len(EPA_Rice_Emissions)):
            match_state = np.where(EPA_Rice_Emissions['State'][istate] == State_ANSI['name'])[0][0]
            state_emis[match_state,iyear] = EPA_Rice_Emissions.loc[istate,year_range[iyear]]/(25*1e-3) #covert from MMT CO2e to kt
    else:
        state_emis[:,iyear] = np.mean(state_emis[:,idx_2015-2:idx_2015+1],axis=1)

    print('Total emissions (kt)', year_range_str[iyear],np.sum(state_emis[:,iyear]))

#### Step 2.3. Read In USDA Census/Survey data

#### Step 2.3.1 2012 State Census Data

In [None]:
#Make state array of 2012 rice acreage

# Read in 2012 USDA Census Data
State_temp = pd.read_csv(State_2012rice_file)

Census12_State = State_temp[['State ANSI','Value']]
# Make array with ansi, state abbreviation, and area harvested
State_rice_12 = State_ANSI.iloc[:,[0,1]].copy()
State_rice_12['Area_harvested'] = 0.0

# place area harvested in correct state location
for istate in np.arange(0,len(Census12_State)):
    #print(istate)
    # Check to see if value contains a comma. If so, remove comma.
    if "," in Census12_State.loc[istate,'Value']:
        Census12_State.loc[istate,'Value'] = Census12_State.loc[istate,'Value'].replace(",","") #= Census12_State.loc[istate,'Value'].replace(",","")
    # Replace (D) in Value field with zeroes
    if  Census12_State.loc[istate,'Value'].strip() == '(D)':
        Census12_State.loc[istate,'Value'] = 0
        
    # Some census data are read in as strings. Convert all values to float.
    Census12_State.loc[istate,'Value'] = np.float(Census12_State.loc[istate,'Value'])
        
    # Copy state area harvested numbers to appropriate location in State_rice
    match_state = np.where(State_rice_12.ansi==Census12_State.loc[istate,'State ANSI'])[0][0]
    State_rice_12.loc[match_state,'Area_harvested'] = Census12_State.loc[istate,'Value']
    
display(State_rice_12.head())

#### Step 2.3.2. 2017 State Census Data

In [None]:
#Make state array of 2017 rice acreage

# Read in 2017 USDA Census Data
State_temp = pd.read_csv(State_2017rice_file)
Census17_State = State_temp[['State ANSI','Value']]

State_rice_17 = State_ANSI.iloc[:,[0,1]]
State_rice_17['Area_harvested'] = 0.0

for istate in np.arange(0,len(Census17_State)):
    
    # Check to see if value contains a comma. If so, remove comma.
    if "," in Census17_State.loc[istate,'Value']:
        Census17_State.loc[istate,'Value'] = Census17_State.loc[istate,'Value'].replace(",","")

    # Replace (D) in Value field with zeroes
    if  Census17_State.loc[istate,'Value'].strip() == '(D)':
        Census17_State.loc[istate,'Value'] = 0
        
    # Some census data are read in as strings. Convert all values to int64.
    Census17_State.loc[istate,'Value'] = np.float(Census17_State.loc[istate,'Value'])
        
    # Copy state area harvested numbers to appropriate location in State_rice
    match_state = np.where(State_rice_17.ansi==Census17_State.loc[istate,'State ANSI'])[0][0]
    State_rice_17.loc[match_state,'Area_harvested'] = Census17_State.loc[istate,'Value']
    
State_rice_17.head()

#### Step 2.3.3. 2012 County data

In [None]:
#Make county array of 2012 rice acreage

# Read in 2012 County census data
County_temp = pd.read_csv(County_2012rice_file)
Census12_County = County_temp[['State ANSI','County ANSI','Value']]

County_rice_12 = County_ANSI.iloc[:,[0,1,3]]
County_rice_12['Area_harvested'] = 0.0

for icounty in np.arange(0,len(Census12_County)):

    # Check to see if value contains a comma. If so, remove comma.
    if "," in Census12_County.loc[icounty,'Value']:
        Census12_County.loc[icounty,'Value'] = Census12_County.loc[icounty,'Value'].replace(",","")
    
    #Replace (D) in Value field with minus ones [for excess allocation]
    if  Census12_County.loc[icounty,'Value'].strip() == '(D)':
        Census12_County.loc[icounty,'Value'] = -1
    
    #Make Value int
    Census12_County.loc[icounty,'Value'] = int(Census12_County.loc[icounty,'Value'])
        
    #Replace negatives in Value field with 0 (prevent negative emissions)
    if  Census12_County.loc[icounty,'Value'] < 0:
        Census12_County.loc[icounty,'Value'] = 0
    
    # Some census data are read in as strings. Convert all values to int64.
    Census12_County.loc[icounty,'Value'] = np.float(Census12_County.loc[icounty,'Value'])
    
    # Find index of the county in the County_rice array
    match_county = np.where((County_rice_12['State']==Census12_County.loc[icounty,'State ANSI']) & 
                        (County_rice_12['County']==Census12_County.loc[icounty,'County ANSI']))[0][0]
    County_rice_12.loc[match_county,'Area_harvested'] = Census12_County.loc[icounty,'Value']

#Set one reporting Florida county to -1 to be consistent with other Florida counties
match_county = np.where((County_rice_12['State']==12) & (County_rice_12['County']==49 ))[0][0]
County_rice_12.loc[match_county,'Area_harvested'] = -1

County_rice_12.head()

#### Step 2.3.4. 2017 County data

In [None]:
#Make county array of 2017 rice acreage

#Read in USDA Census data
County_temp = pd.read_csv(County_2017rice_file)
Census17_County = County_temp[['State ANSI','County ANSI','Value']]

County_rice_17 = County_ANSI.iloc[:,[0,1,3]]
County_rice_17['Area_harvested'] = 0.0

for icounty in np.arange(0,len(Census17_County)):

    # Check to see if value contains a comma. If so, remove comma.
    if "," in Census17_County.loc[icounty,'Value']:
        Census17_County.loc[icounty,'Value'] = Census17_County.loc[icounty,'Value'].replace(",","")
    
    #Replace (D) in Value field with minus ones [for excess allocation]
    if  Census17_County.loc[icounty,'Value'].strip() == '(D)':
        Census17_County.loc[icounty,'Value'] = -1
    
    #Make Value int
    Census17_County.loc[icounty,'Value'] = int(Census17_County.loc[icounty,'Value'])
        
    #Replace negatives in Value field with 0 (prevent negative emissions)
    if  Census17_County.loc[icounty,'Value'] < 0:
        Census17_County.loc[icounty,'Value'] = 0
    
    # Some census data are read in as strings. Convert all values to int64.
    Census17_County.loc[icounty,'Value'] = np.float(Census17_County.loc[icounty,'Value'])
    
    # Find index of the county in the County_rice array
    match_county = np.where((County_rice_17['State']==Census17_County.loc[icounty,'State ANSI']) & 
                        (County_rice_17['County']==Census17_County.loc[icounty,'County ANSI']))[0][0]
    County_rice_17.loc[match_county,'Area_harvested'] = Census17_County.loc[icounty,'Value']

#Set one reporting Florida county to -1 to be consistent with other Florida counties
match_county = np.where((County_rice_17['State']==12) & (County_rice_17['County']==49 ))[0][0]
County_rice_17.loc[match_county,'Area_harvested'] = -1

County_rice_17.head()

#### Step 2.4. Format USDA Census Data

##### Step 2.4.1 Update the USDA County-level rice harvested acreage data for 2012 and 2017

In [None]:
# The sum of the county data does not equal the state total acreage. Therefore, the 
# sum of state and county data are
# compared for each state, and the 'missing' data for each relevant county is estimated based on the ratio of 
# 'missing' acres to total area in that county


# For both 2012 and 2017...
# 1a) Record the area of rice harvested in each state (from the state census and sum of country census data).
# 1b) if county data is missing (area harvest ==-1), then add the total area for that county 
#     in a running state sum
# 2)  For the states with missing county data, calculate the ratio of the missing data per 
#     county area
# 3)  Calculate the missing acres at the state level 

#Initialize variables
State_rice_Area_harvested_12 = np.array(State_rice_12['Area_harvested'])
State_rice_Area_harvested_17 = np.array(State_rice_17['Area_harvested'])
County_rice_Area_harvested_12 = np.array(County_rice_12['Area_harvested'])
County_rice_Area_harvested_17 = np.array(County_rice_17['Area_harvested'])


## 1) Record Area where data is Missing

#Save the sum of the harvested area for each state (from sum of county data)
# Also record the area in each county and state that does not report rice area harvested

#2012 Data
Census_summary_RH_State_12 = np.zeros(len(State_ANSI))
Census_summary_RH_County_12 = np.zeros(len(State_ANSI))
Census_summary_RH_Missing_Area_12 = np.zeros(len(State_ANSI))
Census_summary_RH_per_area_12 = np.zeros(len(State_ANSI))

for istate in np.arange(0,len(State_rice_12)):
    # Copy state rice harvested numbers to appropriate location in State_rice
    match_state = np.where(State_ANSI['ansi']==State_rice_12.ansi[istate])[0][0]
    Census_summary_RH_State_12[match_state] = State_rice_Area_harvested_12[istate]

for icounty in np.arange(0,len(County_rice_12)):
    match_state = np.where(State_ANSI['ansi']==County_rice_12['State'][icounty])[0][0]
    #Add all county totals for the state together [don't add -1]
    if County_rice_Area_harvested_12[icounty] > 0:
        Census_summary_RH_County_12[match_state] += County_rice_Area_harvested_12[icounty]
    # If area is missing in the county census data, save the total area for each missing county for each state
    if (County_rice_Area_harvested_12[icounty] == -1):        
        #Add area of counties missing rice harvested numbers
        Census_summary_RH_Missing_Area_12[match_state] += County_rice_12['Area'][icounty]

#Set Florida to 1 to allocate to correct counties
Census_summary_RH_State_12[State_ANSI.index[State_ANSI['abbr'] =='FL'].tolist()[0]] = 1.0


#2017 Data
Census_summary_RH_State_17 = np.zeros(len(State_ANSI))
Census_summary_RH_County_17 = np.zeros(len(State_ANSI))
Census_summary_RH_Missing_Area_17 = np.zeros(len(State_ANSI))
Census_summary_RH_per_area_17 = np.zeros(len(State_ANSI))

for istate in np.arange(0,len(State_rice_17)):
    # Copy state rice harvested numbers to appropriate location in State_rice
    match_state = np.where(State_ANSI['ansi']==State_rice_17.ansi[istate])[0][0]
    Census_summary_RH_State_17[match_state] = State_rice_Area_harvested_17[istate]

for icounty in np.arange(0,len(County_rice_17)):
    match_state = np.where(State_ANSI['ansi']==County_rice_17['State'][icounty])[0][0]
    #Add all county totals for the state together [don't add -1]
    if County_rice_Area_harvested_17[icounty] > 0:
        Census_summary_RH_County_17[match_state] += County_rice_Area_harvested_17[icounty]
    if (County_rice_Area_harvested_17[icounty] == -1):        
        #Add area of counties missing rice harvested numbers
        Census_summary_RH_Missing_Area_17[match_state] += County_rice_17['Area'][icounty]

#Set Florida to 1 to allocate to correct counties
Census_summary_RH_State_17[State_ANSI.index[State_ANSI['abbr'] =='FL'].tolist()[0]] = 1.0



##2) Calculate missing acres per area

# Calculate the fraction of harvested rice that was not recorded (per area)
# = (state data - available county data)/ missing state area data

#2012
for istate in np.arange(0,len(State_ANSI)):
    if Census_summary_RH_Missing_Area_12[istate] > 0:
        #Calculate number of "extra" rice acres per unit area for each state ((state sum - county sum)/missing area)
        Census_summary_RH_per_area_12[istate] = (np.float64(Census_summary_RH_State_12[istate]) - 
            np.float64(Census_summary_RH_County_12[istate])) / np.float64(Census_summary_RH_Missing_Area_12[istate])
    else:
        Census_summary_RH_per_area_12[istate] = 0.0
    #Make sure we don't have negative areas
    if (Census_summary_RH_per_area_12[istate] < 0):
        Census_summary_RH_per_area_12[istate] = 0.0

#2017 data
for istate in np.arange(0,len(State_ANSI)):
    if Census_summary_RH_Missing_Area_17[istate] > 0:
        #Calculate number of "extra" acres rice per unit area for each state
        Census_summary_RH_per_area_17[istate] = (np.float64(Census_summary_RH_State_17[istate]) - 
            np.float64(Census_summary_RH_County_17[istate])) / np.float64(Census_summary_RH_Missing_Area_17[istate])
    else:
        Census_summary_RH_per_area_17[istate] = 0.0
    #Make sure we don't have negative areas
    if (Census_summary_RH_per_area_17[istate] < 0):
        Census_summary_RH_per_area_17[istate] = 0.0
        
        
## 3) Calculate missing county rice data ( = rice harvest per county area * county area)

#2012
for icounty in np.arange(0,len(County_rice_12)):
    match_state = np.where(State_ANSI['ansi']==County_rice_12['State'][istate])[0][0]
    if (County_rice_Area_harvested_12[icounty] == -1):
        County_rice_Area_harvested_12[icounty] = Census_summary_RH_per_area_12[match_state] * County_rice_12['Area'][icounty]

#2017
for icounty in np.arange(0,len(County_rice_17)):
    match_state = np.where(State_ANSI['ansi']==County_rice_17['State'][icounty])[0][0]
    if (County_rice_Area_harvested_17[icounty] == -1):
        County_rice_Area_harvested_17[icounty] = Census_summary_RH_per_area_17[match_state] * County_rice_17['Area'][icounty]

##### Step 2.4.2. Use County data to find total area and total rice area harvest in each state

In [None]:
#2012

print('QA/QC: Check formatting')
# Use county data to sum total area and total area rice harvested
State_total_Area_12 = np.zeros(len(State_ANSI))
State_total_Area_harvested_12 = np.zeros(len(State_ANSI))
    
for icounty in np.arange(0,len(County_rice_12)):
    match_state = np.where(State_ANSI['ansi']==County_rice_12['State'][icounty])[0][0]

    # Add all county totals for the state together
    State_total_Area_12[match_state] += County_rice_12['Area'][icounty].astype(float)
    State_total_Area_harvested_12[match_state] += County_rice_Area_harvested_12[icounty]

print('2012 Should be zero: ')
print(np.min(County_rice_Area_harvested_12))


#2017

#First calculate the area of each state from the county 
State_total_Area_17 = np.zeros(len(State_ANSI))
State_total_Area_harvested_17 = np.zeros(len(State_ANSI))
    
for icounty in np.arange(0,len(County_rice_17)):
    match_state = np.where(State_ANSI['ansi']==County_rice_17['State'][icounty])[0][0]
    
    # Add all county totals for the state together
    State_total_Area_17[match_state] += County_rice_17['Area'][icounty].astype(float)
    State_total_Area_harvested_17[match_state] += County_rice_Area_harvested_17[icounty]

print('2017 Should be zero: ')
print(np.min(County_rice_Area_harvested_17))

##### Step 2.4.3 Interpolate County Area Harvested to Gap Fill Remaining Years

In [None]:
#1 ) State Data
#Create array to hold rice state data
State_total_Area_harvested = np.zeros([len(State_ANSI),num_years])

#Find average annual change 2012-2017
State_slope = (State_total_Area_harvested_17-State_total_Area_harvested_12)/5

#Calculate 2013-2016, 2018 & store in array:
for iyear in np.arange(0,num_years):
    State_total_Area_harvested[:,iyear] = State_total_Area_harvested_12 + iyear*State_slope


#2) County Data
#Create array to hold rice county data
County_rice_Area_harvested = np.zeros([len(County_ANSI),num_years])

#Find average annual change 2012-2017
County_slope = (County_rice_Area_harvested_17-County_rice_Area_harvested_12)/5

#Calculate 2013-2016, 2018:
for iyear in np.arange(0,num_years):
    County_rice_Area_harvested[:,iyear] = County_rice_Area_harvested_12 + iyear*County_slope
County_rice_Area_harvested[County_rice_Area_harvested<0] = 0
    
#Make proxy matrix
map_county_harvest_area = np.zeros([len(State_ANSI),len(County_ANSI),num_years])
for iyear in np.arange(0,num_years):
    for icounty in np.arange(0,len(County_ANSI)):
        istate = np.where(State_ANSI['ansi']==County_ANSI['State'][icounty])[0][0]
        map_county_harvest_area[istate,icounty,iyear] = County_rice_Area_harvested[icounty,iyear]

#### Step 2.5 Read in and create yearly gridded rice crop area arrays (from the USDA)

In [None]:
#make year specific gridded areas of the total area of rice crops (from USDA) (0.1 x0.1)
 
rice_array = np.zeros([len(lat001), len(lon001), num_years])
rice_array_nongrid = np.zeros(num_years)
rice_county_sum = np.zeros([len(State_ANSI), len(County_ANSI), num_years])

print('QA/QC: Check Gridded Area Sums')
for iyear in np.arange(0, num_years):
    filename = Rice_list + year_range_str[iyear]+ '_001x001.csv'
    Rice_list_temp = pd.read_csv(filename)
    
    for idx in np.arange(len(Rice_list_temp)):
        #Filter inside domain
        if Rice_list_temp['Longitude'][idx] > Lon_left and Rice_list_temp['Longitude'][idx] < Lon_right and \
               Rice_list_temp['Latitude'][idx] > Lat_low and Rice_list_temp['Latitude'][idx] < Lat_up:
            #Set ilon and ilat
            ilat = int((Rice_list_temp['Latitude'][idx] - Lat_low)/Res_01)
            ilon = int((Rice_list_temp['Longitude'][idx] - Lon_left)/Res_01)
            rice_array[ilat,ilon,iyear] += Rice_list_temp['SUM_Area_Rice'][idx]
        else:
            rice_array_nongrid[iyear] += Rice_list_temp['SUM_Area_Rice'][idx]

    sum_area = np.sum(rice_array[:,:,iyear])+np.sum(rice_array_nongrid[iyear])
    net_area = np.sum(Rice_list_temp['SUM_Area_Rice'])
    area_diff = abs((sum_area - net_area)/((net_area+sum_area)/2))
    if abs(area_diff) < 0.0001:
        print('Year '+ year_range_str[iyear]+': Difference < 0.01%: PASS')
    else: 
        print('Year '+ year_range_str[iyear]+': Difference > 0.01%: FAIL, diff: '+str(area_diff))

-----------
## Step 3. Read in and Format US EPA GHGI Emissions
----------

In [None]:
#Read in emissions total EPA emissions (in MMT CO2e, converted to kt)
EPA_Rice_Emissions = pd.read_excel(EPA_rice_inputfile,skiprows=3, sheet_name = 'Total Methane Emissions')
EPA_Rice_Emissions.dropna(axis=0,inplace=True)

#Remove non-state rows
for irow in np.arange(len(EPA_Rice_Emissions)):
    if EPA_Rice_Emissions['State'][irow] == 'Tier 1':
        EPA_Rice_Emissions.drop([irow],inplace=True)
    elif EPA_Rice_Emissions['State'][irow] == 'Tier 3':
        EPA_Rice_Emissions.drop([irow],inplace=True)
    elif EPA_Rice_Emissions['State'][irow] == 'Tier 1 Total':
        EPA_Rice_Emissions.drop([irow],inplace=True)
    elif EPA_Rice_Emissions['State'][irow] == 'Tier 3 Total':
        EPA_Rice_Emissions.drop([irow],inplace=True)
EPA_Rice_Emissions = EPA_Rice_Emissions.drop(columns = [*range(1990, start_year,1)])
EPA_Rice_Emissions = EPA_Rice_Emissions[EPA_Rice_Emissions['State'] == 'Total'] #drop total row
EPA_Rice_Emissions.rename(columns={EPA_Rice_Emissions.columns[0]:'Source'}, inplace=True)
EPA_Rice_Emissions.reset_index(inplace=True, drop=True)
for iyear in np.arange(0,num_years):
    EPA_Rice_Emissions.iloc[0,iyear+1] = EPA_Rice_Emissions.iloc[0,iyear+1]/(25*1e-3)
display(EPA_Rice_Emissions)

#### 3.2. Split Emissions into Gridding Groups (each Group will have the same proxy applied during the state allocation/gridding)

In [None]:
#split emissions into scaling groups
# In this case, data are only availabe for total emissions

DEBUG =1

start_year_idx = EPA_Rice_Emissions.columns.get_loc((start_year))
end_year_idx = EPA_Rice_Emissions.columns.get_loc((end_year))+1
ghgi_rice_groups = ghgi_rice_map['GHGI_Emi_Group'].unique()
sum_emi = np.zeros([num_years])


for igroup in np.arange(0,len(ghgi_rice_groups)): #loop through all groups, finding the GHGI sources in that group and summing emissions for that region, year        vars()[ghgi_prod_groups[igroup]] = np.zeros([num_regions-1,num_years])
    ##DEBUG## print(ghgi_rice_groups[igroup])
    vars()[ghgi_rice_groups[igroup]] = np.zeros([num_years])
    source_temp = ghgi_rice_map.loc[ghgi_rice_map['GHGI_Emi_Group'] == ghgi_rice_groups[igroup], 'GHGI_Source']
    pattern_temp  = '|'.join(source_temp) 
    emi_temp = EPA_Rice_Emissions[EPA_Rice_Emissions['Source'].str.contains(pattern_temp)]
    ##DEBUG## display(emi_temp)
    vars()[ghgi_rice_groups[igroup]][:] = emi_temp.iloc[:,start_year_idx:].sum()
    ##DEBUG## display(vars()[ghgi_rice_groups[igroup]][:])
        
        
#Check against total summary emissions 
print('QA/QC #1: Check Processing Emission Sum against GHGI Summary Emissions')
for iyear in np.arange(0,num_years): 
    for igroup in np.arange(0,len(ghgi_rice_groups)):
        sum_emi[iyear] += vars()[ghgi_rice_groups[igroup]][iyear]
        
    summary_emi = EPA_Rice_Emissions.iloc[0,iyear+1]  
    diff1 = abs(sum_emi[iyear] - summary_emi)/((sum_emi[iyear] + summary_emi)/2)
    if DEBUG ==1:
        print(summary_emi)
        print(sum_emi[iyear])
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL (check Production & summary tabs): ', diff1,'%') 

--------------
## Step 4. Grid Data
-------------

#### Step 4.1. Allocate emissions

##### Step 4.1.1 Assign the Appropriate Proxy Variable Names (state & grid)

In [None]:
# The names on the *left* need to match the 'Stationary_ProxyMapping' 'State_Proxy_Group' names 
# (these are initialized in Step 2). 
# The names on the *right* are the variable names used to caluclate the proxies in this code.
# Names on the right need to match those from the code in Step 2

#national --> state proxies (state x year [X month])
State_Emis = state_emis

#state --> county proxies (state x county x year)
County_rice_acreage = map_county_harvest_area

#county --> grid proxies (0.01x0.01)
Map_rice_area = rice_array
Map_rice_area_nongrid = rice_array_nongrid



# remove variables to clear space for larger arrays 
#del sedsind_coal_state,sedsind_wood_state,sedsind_oil_state,sedsind_gas_state,sedsres_coal_state,sedsres_wood_state
##del sedsres_oil_state,sedsres_gas_state,sedscom_coal_state,sedscom_wood_state,sedscom_oil_state,sedscom_gas_state
#del arp_wood_array,arp_wood_array_nongrid,arp_coal_array,arp_coal_array_nongrid,arp_oil_array,arp_oil_array_nongrid
#del arp_gas_array,arp_gas_array_nongrid,ghgrp_emi_array,ghgrp_emi_array_nongrid,pop_den_map

##### Step 4.1.2 Allocate National EPA Emissions to the State-Level

In [None]:
# Calculate state-level emissions 
# Emissions in kt
# State data = national GHGI emissions * state proxy/national total

DEBUG = 1

# Note that national emissions are retained for groups that do not have state proxies (identified in the mapping file)
# and are gridded in the next step

# Make placeholder emission arrays for each group
for igroup in np.arange(0,len(proxy_rice_map)):
    vars()['State_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(State_ANSI),num_years])
    vars()['NonState_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years])
        
#Loop over years
for iyear in np.arange(num_years):
    #Loop over states
    for istate in np.arange(len(State_ANSI)):
        for igroup in np.arange(0,len(proxy_rice_map)):    
            if proxy_rice_map.loc[igroup,'State_Proxy_Group'] != '-' and proxy_rice_map.loc[igroup,'GHGI_Emi_Group'] != 'Emi_not_mapped':
                vars()['State_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][istate,iyear] = \
                    vars()[proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][iyear]* \
                    data_fn.safe_div(vars()[proxy_rice_map.loc[igroup,'State_Proxy_Group']][istate,iyear], \
                                     np.sum(vars()[proxy_rice_map.loc[igroup,'State_Proxy_Group']][:,iyear]))   
            else:
                vars()['NonState_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][iyear] = vars()[proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][iyear]
                
# Check sum of all gridded emissions + emissions not included in state allocation
print('QA/QC #1: Check weighted emissions against GHGI')   
for iyear in np.arange(0,num_years):
    summary_emi = EPA_Rice_Emissions.iloc[0,iyear+1] 
    calc_emi = 0
    for igroup in np.arange(0,len(proxy_rice_map)):
        calc_emi +=  np.sum(vars()['State_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][:,iyear])+\
            vars()['NonState_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][iyear] #np.sum(Emissions[:,iyear]) + Emissions_nongrid[iyear] + Emissions_nonstate[iyear]
    if DEBUG ==1:
        print(summary_emi)
        print(calc_emi)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

##### 4.1.3 Allocate emissions to the county level

In [None]:
# Calculate state-level emissions for commencial, residential, and industrial sectors
# Emissions in kt
# State data = national GHGI emissions * state proxy/national total

DEBUG = 1

# Make placeholder emission arrays for each group
for igroup in np.arange(0,len(proxy_rice_map)):
    vars()['County_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']] = \
            np.zeros([len(State_ANSI),len(County_ANSI),num_years])
    vars()['NonCounty_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years])
        
#Loop over years
for iyear in np.arange(0,num_years):
    for icounty in np.arange(0,len(County_ANSI)):
        istate = np.where(State_ANSI['ansi']==County_ANSI['State'][icounty])[0][0]
        state_ansi = State_ANSI['ansi'][istate]
        for igroup in np.arange(0,len(proxy_rice_map)):    
            emi_temp = vars()['State_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][istate,iyear]
            frac_temp = data_fn.safe_div(vars()[proxy_rice_map.loc[igroup,'County_Proxy_Group']][istate,icounty,iyear], \
                            np.sum(vars()[proxy_rice_map.loc[igroup,'County_Proxy_Group']][istate,:,iyear]))
            if emi_temp > 0 and frac_temp > 0:
                vars()['County_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][istate,icounty,iyear] = emi_temp * frac_temp
            elif emi_temp > 0 and np.sum(vars()[proxy_rice_map.loc[igroup,'County_Proxy_Group']][istate,:,iyear]) == 0:
                
                frac_temp = data_fn.safe_div(County_ANSI.loc[icounty,'Area'],np.sum(County_ANSI['Area'][County_ANSI['State'] == state_ansi]))
                vars()['County_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][istate,icounty,iyear] = emi_temp * frac_temp  
            else:
                vars()['NonCounty_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][iyear] = \
                    np.sum(vars()['State_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][istate,iyear])

# Check sum of all gridded emissions + emissions not included in state allocation
print('QA/QC #1: Check weighted emissions against GHGI')   
for iyear in np.arange(0,num_years):
    summary_emi = EPA_Rice_Emissions.iloc[0,iyear+1] 
    calc_emi = 0
    for igroup in np.arange(0,len(proxy_rice_map)):
        calc_emi +=  np.sum(vars()['County_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])+\
            vars()['NonCounty_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][iyear]
    if DEBUG ==1:
        print(summary_emi)
        print(calc_emi)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

##### 4.1.4 Allocate county emissions to the CONUS region (0.1x0.1)

In [None]:
# Allocate County-Level emissions (kt) onto a 0.1x0.1 grid using gridcell level 'Proxy_Groups'

DEBUG =1
#Define emission arrays
Emissions_array_01 = np.zeros([len(Lat_01),len(Lon_01),num_years])
#Emissions_array_001 = np.zeros([len(lat001),len(lon001),num_years])
Emissions_nongrid = np.zeros([num_years])


# For each year, (2a) distribute state-level emissions onto a grid using proxies defined above ....
# To speed up the code, masks are used rather than looping individually through each lat/lon. 
# In this case, a mask of 1's is made for the grid cells that match the ANSI values for a given state
# The masked values are set to zero, remaining values = 1. 
# AK and HI and territories are removed from the analysis at this stage. 
# The emissions allocated to each state are at 0.01x0.01 degree resolution, as required to calculate accurate 'mask'
# arrays for each state. 
# (2b - not applicable here) For emission groups that were not first allocated to states, national emissions for those groups are gridded
# based on the relevant gridded proxy arrays (0.1x0.1 resolution). These emissions are at 0.1x0.1 degrees resolution. 
# (2c - not applicable here) - record 'not mapped' emission groups in the 'non-grid' array# For the state on MN - the EPA GHGI includes Rice emissions for the state of MN, however the USDA does not 
# include rice harvested area for this state. Therefore, emissions are allocated based on the total area in that country

print('**QA/QC Check: Sum of national gridded emissions vs. GHGI national emissions')
  
# for iyear in np.arange(0,num_years):
    #calc_mn = 0
    
#1. Step through each gridding group
for igroup in np.arange(0,len(proxy_rice_map)):
    proxy_temp = vars()[proxy_rice_map.loc[igroup,'Proxy_Group']]
    proxy_temp_nongrid = vars()[proxy_rice_map.loc[igroup,'Proxy_Group']+'_nongrid']
    vars()['Ext_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    
    #2a. Step through each county (if group was previously allocated to county level)
    if proxy_rice_map.loc[igroup,'County_Proxy_Group'] != '-' and proxy_rice_map.loc[igroup,'County_Proxy_Group'] != 'state_not_mapped':
        for icounty in np.arange(0,len(County_ANSI)):
            print(icounty, 'of',len(County_ANSI))
            istate = np.where(State_ANSI['ansi']==County_ANSI['State'][icounty])[0][0]
            #print(icounty, istate)
            if State_ANSI['abbr'][istate] not in {'AK','HI'} and istate < 51:
                #print()
                mask_county = np.ma.ones(np.shape(county_ANSI_map))
                mask_county = np.ma.masked_where(county_ANSI_map != County_ANSI['County'][icounty], mask_county)
                mask_county = np.ma.masked_where(state_ANSI_map != County_ANSI['State'][icounty], mask_county)
                mask_county = np.ma.filled(mask_county,0)
                for iyear in np.arange(0,num_years):
                    county_temp = vars()['County_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][istate,icounty,iyear]
                    if county_temp > 0 :
                        if np.sum(mask_county*proxy_temp[:,:,iyear]) > 0: 
                            weighted_array = data_fn.safe_div(mask_county*proxy_temp[:,:,iyear],np.sum(mask_county*proxy_temp[:,:,iyear]))
                            weighted_array_01 = data_fn.regrid001_to_01(weighted_array, Lat_01, Lon_01)
                            Emissions_array_01[:,:,iyear] += county_temp*weighted_array_01
                            vars()['Ext_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]+= county_temp*weighted_array_01
                        elif np.sum(mask_county*proxy_temp[:,:,iyear]) == 0:
                            # if there is no rice harvested data, but there are county emissions, weighted by area in county... 
                            weighted_array = data_fn.safe_div(mask_county*area_map[:,:],np.sum(mask_county*area_map[:,:]))
                            weighted_array_01 = data_fn.regrid001_to_01(weighted_array, Lat_01, Lon_01)
                            Emissions_array_01[:,:,iyear] += county_temp*weighted_array_01
                            vars()['Ext_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]+= county_temp*weighted_array_01
                        else:
                            Emissions_nongrid[iyear] += county_temp
            else:
                for iyear in np.arange(0, num_years):
                    county_temp = vars()['County_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][istate,icounty,iyear]
                    Emissions_nongrid[iyear] += county_temp 
    
for iyear in np.arange(0, num_years):    
    calc_emi = np.sum(Emissions_array_01[:,:,iyear]) + np.sum(Emissions_nongrid[iyear]) 
    calc_emi2 = 0
    for igroup in np.arange(0,len(proxy_rice_map)):
        calc_emi2 += np.sum(vars()['Ext_'+proxy_rice_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])
    calc_emi2 += np.sum(Emissions_nongrid[iyear]) 
    summary_emi = EPA_Rice_Emissions.iloc[0,iyear+1] 
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG ==1:
        print(summary_emi)
        print(calc_emi)
        print(calc_emi2)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

#### Step 4.1.4 Save gridded emissions (kt)

In [None]:
#save gridded emissions for each gridding group - for extension

#Initialize file
data_IO_fn.initialize_netCDF(grid_emi_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

unique_groups = np.unique(proxy_rice_map['GHGI_Emi_Group'])
unique_groups = unique_groups[unique_groups != 'Emi_not_mapped']

nc_out = Dataset(grid_emi_outputfile, 'r+', format='NETCDF4')

for igroup in np.arange(0,len(unique_groups)):
    print('Ext_'+unique_groups[igroup])
    if len(np.shape(vars()['Ext_'+unique_groups[igroup]])) ==4:
        ghgi_temp = np.sum(vars()[unique_groups[igroup]],axis=3) #sum month data if data is monthly
    else:
        ghgi_temp = vars()['Ext_'+unique_groups[igroup]]

    # Write data to netCDF
    data_out = nc_out.createVariable('Ext_'+unique_groups[igroup], 'f8', ('lat', 'lon','year'), zlib=True)
    data_out[:,:,:] = ghgi_temp[:,:,:]

#save nongrid data to calculate non-grid fraction extension
data_out = nc_out.createVariable('Emissions_nongrid', 'f8', ('year'), zlib=True)  
data_out[:] = Emissions_nongrid[:]
nc_out.close()

#Confirm file location
print('** SUCCESS **')
print("Gridded emissions (kt) written to file: {}" .format(os.getcwd())+grid_emi_outputfile)
print(' ')

del data_out, ghgi_temp, nc_out

#### 4.2. Calculate Gridded Emission Fluxes (molec./cm2/s) (0.1x0.1)

In [None]:
#Convert emissions to emission flux
# conversion: kt emissions to molec/cm2/s flux

DEBUG = 1

Flux_array_01_annual = np.zeros([len(Lat_01),len(Lon_01),num_years])
print('**QA/QC Check: Sum of national gridded emissions vs. GHGI national emissions')
  
for iyear in np.arange(0,num_years):
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
    else:
        year_days = np.sum(month_day_nonleap)
        
    conversion_factor_01 = 10**9 * Avogadro / float(Molarch4 *year_days * 24 * 60 *60) / area_matrix_01
    Flux_array_01_annual[:,:,iyear] += Emissions_array_01[:,:,iyear]*conversion_factor_01
    
    calc_emi = np.sum(Flux_array_01_annual[:,:,iyear]/conversion_factor_01)+np.sum(Emissions_nongrid[iyear])
    summary_emi = EPA_Rice_Emissions.iloc[0,iyear+1] 
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG ==1:
        print(calc_emi)
        print(summary_emi)
    if abs(emi_diff) < 0.0001:
        print('Year '+ year_range_str[iyear]+': Difference < 0.01%: PASS')
    else: 
        print('Year '+ year_range_str[iyear]+': Difference > 0.01%: FAIL, diff: '+str(emi_diff))
        
Flux_Emissions_Total_annual = Flux_array_01_annual

#### Step 4.3. Apply Gridded Month Scaling Factor

In [None]:
#Calculate Monthly emissions and emission fluxes (the same motnhly scaling factor is applied to all years)
DEBUG =1

Flux_array_01 = np.zeros([len(Lat_01),len(Lon_01),num_years,num_months])
Emissions_array = np.zeros([len(Lat_01),len(Lon_01),num_years,num_months])

#Read in normalized monthly data from Anthony Bloom
month_file = Dataset(Bloom_month_factors_file)
month_map = np.array(month_file.variables['data'])
month_file.close()

#Scale the annual emissions data (apply the same scaling factors for each year)
for iyear in np.arange(0, num_years):
    for imonth in np.arange(0, num_months):
        map_scaling_factor = np.sum(month_map[imonth,:,:])/np.sum(month_map[:,:,:])
        Emissions_array[:,:,iyear,imonth] = map_scaling_factor * Emissions_array_01[:,:,iyear]
        
    #Check against total
    calc_emi = 0
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
        month_days = month_day_leap
    else:
        year_days = np.sum(month_day_nonleap)
        month_days = month_day_nonleap
    for imonth in np.arange(0, num_months):
        conversion_factor_01 = 10**9 * Avogadro / float(Molarch4 *month_days[imonth] * 24 * 60 *60) / area_matrix_01
        Flux_array_01[:,:,iyear,imonth] = Emissions_array[:,:,iyear,imonth]*conversion_factor_01
        calc_emi += np.sum(Flux_array_01[:,:,iyear,imonth]/conversion_factor_01)
    calc_emi += np.sum(Emissions_nongrid[iyear])
    summary_emi = EPA_Rice_Emissions.iloc[0,iyear+1] 
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG ==1:
        print(calc_emi)
        print(summary_emi)
    if abs(emi_diff) < 0.0001:
        print('Year '+ year_range_str[iyear]+': Difference < 0.01%: PASS')
    else: 
        print('Year '+ year_range_str[iyear]+': Difference > 0.01%: FAIL, diff: '+str(emi_diff))

-------------
## Step 5. Write netCDF
------------

In [None]:
# monthly data
#Initialize file
data_IO_fn.initialize_netCDF(gridded_month_outputfile, netCDF_description_m, 1, year_range, loc_dimensions, Lat_01, Lon_01)

# Write data to netCDF
nc_out = Dataset(gridded_month_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:,:] = Flux_array_01
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded fluxes written to file: {}" .format(os.getcwd())+gridded_month_outputfile)

# yearly data
#Initialize file
data_IO_fn.initialize_netCDF(gridded_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write data to netCDF
nc_out = Dataset(gridded_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded fluxes written to file: {}" .format(os.getcwd())+gridded_outputfile)

----------
## Step 6. Plot Gridded Data
---------

#### Step 6.1. Plot Annual Emission Fluxes

In [None]:
#Plot Annual Data
scale_max = 10
save_flag =0
save_outfile = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_str,scale_max,save_flag,save_outfile)

#### Step 6.2 Plot Difference between first and last inventory year

In [None]:
# Plot difference between last and first year
save_flag =0
save_outfile = ''
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_diff_str,save_flag, save_outfile)

In [None]:
#Create arrays for plotting illustrative figures

#State
Emissions_array_state_01 = np.zeros([len(lat001),len(lon001)])
mask_land = np.zeros([len(lat001),len(lon001)])
Emissions_array_national_01 = np.zeros([len(lat001),len(lon001)])
national_sum = np.sum(Emissions_array_01[:,:,6])
for istate in np.arange(len(State_ANSI)):
    #find given state (map of values of 1)
    mask_state = np.ma.ones(np.shape(state_ANSI_map))
    mask_state = np.ma.masked_where(state_ANSI_map != State_ANSI['ansi'][istate], mask_state)
    mask_state = np.ma.filled(mask_state,0)
    if np.sum(mask_state)>0:
        #display(np.shape(Emissions_array))
        # find the sum of 2018 emissions from that state
        state_emis = np.sum(mask_state * Emissions_array_001[:,:,6])
        # assign each grid cell for that state to the state total value
        Emissions_array_state_01[:,:] += (state_emis*mask_state)
        mask_land += mask_state
mask_land[mask_land >1] ==1
Emissions_array_national_01[:,:] = (national_sum*mask_land)
Emissions_array_state = data_fn.regrid001_to_01(Emissions_array_state_01, Lat_01, Lon_01)
Emissions_array_national = data_fn.regrid001_to_01(Emissions_array_national_01, Lat_01, Lon_01)
Emissions_array_national /= 100
Emissions_array_state /= 100
del Emissions_array_national_01,Emissions_array_state_01 

In [None]:
#County
Emissions_array_county_01 = np.zeros([len(lat001),len(lon001)])
for icounty in np.arange(len(County_ANSI)):
    mask_county = np.ma.ones(np.shape(county_ANSI_map))
    mask_county = np.ma.masked_where(county_ANSI_map != County_ANSI['County'][icounty], mask_county)
    mask_county = np.ma.masked_where(state_ANSI_map != County_ANSI['State'][icounty], mask_county)
    mask_county = np.ma.filled(mask_county,0)
    # find the sum of 2018 emissions from that county
    county_emis = np.sum(mask_county * Emissions_array_001[:,:,6])
    # assign each grid cell for that county to the county total value
    Emissions_array_county_01[:,:] += county_emis*mask_county
    print(icounty,'of',len(County_ANSI))
Emissions_array_county = data_fn.regrid001_to_01(Emissions_array_county_01, Lat_01, Lon_01)
Emissions_array_county /= 100
    
  

In [None]:
scale_max = np.max(Emissions_array_national[43:300,50:632])
my_cmap = copy(plt.cm.get_cmap('rainbow',lut=3000))
my_cmap._init()
slopen = 200
alphas_slope = np.abs(np.linspace(0, 1.0, slopen))
alphas_stable = np.ones(3003-slopen)
alphas = np.concatenate((alphas_slope, alphas_stable))
my_cmap._lut[:,-1] = alphas
my_cmap.set_under('gray', alpha=0)
    
Lon_cor = Lon_01[50:632]-0.05
Lat_cor = Lat_01[43:300]-0.05
    
xpoints = Lon_cor
ypoints = Lat_cor
yp,xp = np.meshgrid(ypoints,xpoints)
    
        #if np.shape(Emi_flux_map)[0] == len(year_range):

#NATIONAL
zp = Emissions_array_national[43:300,50:632]
        #elif np.shape(Emi_flux_map)[2] == len(year_range):
        #    zp = Emi_flux_map[43:300,50:632,iyear]
        #zp = zp/float(10**6 * Avogadro) * (year_days * 24 * 60 * 60) * Molarch4 * float(1e10)
    
fig, ax = plt.subplots(dpi=300)
m = Basemap(llcrnrlon=xp.min(), llcrnrlat=yp.min(), urcrnrlon=xp.max(),
            urcrnrlat=yp.max(), projection='merc', resolution='h', area_thresh=5000)
m.drawmapboundary(fill_color='Azure')
m.fillcontinents(color='FloralWhite', lake_color='Azure',zorder=1)
m.drawcoastlines(linewidth=0.5,zorder=3)
#m.drawstates(linewidth=0.25,zorder=3)
m.drawcountries(linewidth=0.5,zorder=3)
    
xpi,ypi = m(xp,yp)
plot = m.pcolor(xpi,ypi,zp.transpose(), cmap=my_cmap, vmin=10**-15, vmax=scale_max, snap=True,zorder=2)
cb = m.colorbar(plot, location = "bottom", pad = "1%")
tick_locator = ticker.MaxNLocator(nbins=5)
cb.locator = tick_locator
cb.update_ticks()
    
cb.ax.set_xlabel('2018 Methane Emissions (kt a$^{-1}$)',fontsize=10)
cb.ax.tick_params(labelsize=10)
Titlestring = str(year_range[iyear])+' '+title_str
fig1 = plt.gcf()
plt.title(Titlestring, fontsize=14);
plt.show();
        #if save_flag ==1:
fig1.savefig('Example_Rice_National'+'.tiff',transparent=True)

#STATE     
scale_max = np.max(Emissions_array_state[43:300,50:632]) 
state_proxy = 100*(Emissions_array_state/national_sum)
scale_max = 100
zp=state_proxy[43:300,50:632]
#zp = Emissions_array_state[43:300,50:632]
fig, ax = plt.subplots(dpi=300)
m = Basemap(llcrnrlon=xp.min(), llcrnrlat=yp.min(), urcrnrlon=xp.max(),
            urcrnrlat=yp.max(), projection='merc', resolution='h', area_thresh=5000)
m.drawmapboundary(fill_color='Azure')
m.fillcontinents(color='FloralWhite', lake_color='Azure',zorder=1)
m.drawcoastlines(linewidth=0.5,zorder=3)
m.drawstates(linewidth=0.25,zorder=3)
m.drawcountries(linewidth=0.5,zorder=3)
    
xpi,ypi = m(xp,yp)
plot = m.pcolor(xpi,ypi,zp.transpose(), cmap=my_cmap, vmin=10**-15, vmax=scale_max, snap=True,zorder=2)
cb = m.colorbar(plot, location = "bottom", pad = "1%")
tick_locator = ticker.MaxNLocator(nbins=5)
cb.locator = tick_locator
cb.update_ticks()
    
cb.ax.set_xlabel('2018 State Methane Emissions (kt a$^{-1}$)',fontsize=10)
cb.ax.tick_params(labelsize=10)
Titlestring = str(year_range[iyear])+' '+title_str
fig1 = plt.gcf()
plt.title(Titlestring, fontsize=14);
plt.show();
fig1.savefig('Example_Rice_State_Proxy'+'.tiff',transparent=True)

In [None]:
#COUNTY
scale_max = np.max(Emissions_array_county[43:300,50:632])  
zp = Emissions_array_county[43:300,50:632]
fig, ax = plt.subplots(dpi=300)
m = Basemap(llcrnrlon=xp.min(), llcrnrlat=yp.min(), urcrnrlon=xp.max(),
            urcrnrlat=yp.max(), projection='merc', resolution='h', area_thresh=5000)
m.drawmapboundary(fill_color='Azure')
m.fillcontinents(color='FloralWhite', lake_color='Azure',zorder=1)
m.drawcoastlines(linewidth=0.5,zorder=3)
m.drawstates(linewidth=0.25,zorder=3)
m.drawcountries(linewidth=0.5,zorder=3)
    
xpi,ypi = m(xp,yp)
plot = m.pcolor(xpi,ypi,zp.transpose(), cmap=my_cmap, vmin=10**-15, vmax=scale_max, snap=True,zorder=2)
cb = m.colorbar(plot, location = "bottom", pad = "1%")
tick_locator = ticker.MaxNLocator(nbins=5)
cb.locator = tick_locator
cb.update_ticks()
    
cb.ax.set_xlabel('2018 County Methane Emissions (kt a$^{-1}$)',fontsize=10)
cb.ax.tick_params(labelsize=10)
Titlestring = str(year_range[iyear])+' '+title_str
fig1 = plt.gcf()
plt.title(Titlestring, fontsize=14);
plt.show();
fig1.savefig('Example_Rice_County'+'.tiff',transparent=True)

In [None]:
#Plot Gridded
Flux_Emissions_Total_annual
scale_max = np.max(Flux_Emissions_Total_annual[43:300,50:632,6])  
zp = Flux_Emissions_Total_annual[43:300,50:632,6]
fig, ax = plt.subplots(dpi=300)
m = Basemap(llcrnrlon=xp.min(), llcrnrlat=yp.min(), urcrnrlon=xp.max(),
            urcrnrlat=yp.max(), projection='merc', resolution='h', area_thresh=5000)
m.drawmapboundary(fill_color='Azure')
m.fillcontinents(color='FloralWhite', lake_color='Azure',zorder=1)
m.drawcoastlines(linewidth=0.5,zorder=3)
m.drawstates(linewidth=0.25,zorder=3)
m.drawcountries(linewidth=0.5,zorder=3)
    
xpi,ypi = m(xp,yp)
plot = m.pcolor(xpi,ypi,zp.transpose(), cmap=my_cmap, vmin=10**-15, vmax=scale_max, snap=True,zorder=2)
cb = m.colorbar(plot, location = "bottom", pad = "1%")
tick_locator = ticker.MaxNLocator(nbins=5)
cb.locator = tick_locator
cb.update_ticks()
    
cb.ax.set_xlabel('2018 Gridded Methane Emissions (Mg a$^{-1}$ km$^{-2}$)',fontsize=10)
cb.ax.tick_params(labelsize=10)
Titlestring = str(year_range[iyear])+' '+title_str
fig1 = plt.gcf()
plt.title(Titlestring, fontsize=14);
plt.show();
fig1.savefig('Example_Rice_Grid'+'.tiff',transparent=True)

In [None]:
ct = datetime.datetime.now() 
ft = ct.timestamp() 
time_elapsed = (ft-it)/(60*60)
print('Time to run: '+str(time_elapsed)+' hours')
print('** GEPA_3C_Rice: COMPLETE **')