# Gridded EPA Methane Inventory
## Extension - GHGI 2022

***
#### Authors: 
Erin E. McDuffie
#### Date Last Updated: 
see Step 0
#### Notebook Purpose: 
This Notebook extends and reports annual gridded (0.1°x0.1°) methane emission fluxes (molec./cm2/s) from Petroleum sources for the years 2012-2020, using updated inventory values from the 2022 National GHGI.  
#### Summary & Notes:
EPA annual national methane emissions are read in for the 2022 GHGI (either from the GHGI workbooks or public data). National emissions are then scaled down to CONUS emissions using the relative fraction of CONUS/total emissions from the v2 data (for each year, held constant after 2018). Remaining CONUS data are then allocated to proxy groups using the relevant proxy mapping files and allocated to the grid using the relative mass of emissions in each grid cell from each group from version 2 (for each year, held constant after 2018). Annual emission fluxes (molec./cm2/s) for 2012-2020 are then written to final netCDFs in the ‘/code/Final_Gridded_Data/Extension/v2_ext_final’ folder.
***

-------
## Step 0. Set-Up Notebook Modules, Functions, and Local Parameters and Constants
-------

In [None]:
#Confirm working directory
import os
import time
modtime = os.path.getmtime('./1B2a_Petroleum_Systems_extension.ipynb')
modificationTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(modtime))
print("This file was last modified on: ", modificationTime)
print('')
print("The directory we are working in is {}" .format(os.getcwd()))

In [None]:
## Include plots within notebook
%matplotlib inline

In [None]:
# Import base modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import datetime
from copy import copy

# Import additional modules
# Load plotting package Basemap 
from mpl_toolkits.basemap import Basemap

# Load netCDF (for manipulating netCDF file types)
from netCDF4 import Dataset

# Set up ticker
import matplotlib.ticker as ticker

#add path for the global function module (file)
import sys
module_path = os.path.abspath(os.path.join('../Global_Functions/'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load Tabula (for reading tables from PDFs)
import tabula as tb   
    
# Load user-defined global functions (modules)
import data_load_functions as data_load_fn
import data_functions as data_fn
import data_IO_functions as data_IO_fn
import data_plot_functions as data_plot_fn

In [None]:
#INPUT Files
# Assign global file names
global_filenames = data_load_fn.load_global_file_names()
State_ANSI_inputfile = global_filenames[0]
County_ANSI_inputfile = global_filenames[1]
pop_map_inputfile = global_filenames[2]
Grid_area01_inputfile = global_filenames[3]
Grid_area001_inputfile = global_filenames[4]
Grid_state001_ansi_inputfile = global_filenames[5]
Grid_county001_ansi_inputfile = global_filenames[6]
globalinputlocation = global_filenames[0][0:20]
print(globalinputlocation)

# EPA Inventory Data
EPA_Petr_inputfile = globalinputlocation+'GHGI/Ch3_Energy/PetroleumSystems_1990-2020_FINAL.xlsx'

#proxy mapping file
Petr_Mapping_inputfile = './InputData/Petroleum_ProxyMapping.xlsx'

#OUTPUT FILES
gridded_expl_outputfile = '../Final_Gridded_Data/Extension/v2_ext_final/EPA_v2_1B2a_Petroleum_Systems_Exploration.nc'
netCDF_expl_description = 'EXTENSION to the Gridded EPA Inventory - Petroleum Systems Emissions - IPCC Source Category 1B2a - Exploration'
title_expl_str = "EPA methane emissions from exploration"
title_expl_diff_str = "Emissions from exploration difference: 2020-2012"

gridded_prod_outputfile = '../Final_Gridded_Data/Extension/v2_ext_final/EPA_v2_1B2a_Petroleum_Systems_Production.nc'
netCDF_prod_description = 'EXTENSION to the Gridded EPA Inventory - Petroleum Systems Emissions - IPCC Source Category 1B2a - Production'
title_prod_str = "EPA methane emissions from production"
title_prod_diff_str = "Emissions from production difference: 2020-2012"

gridded_trans_outputfile = '../Final_Gridded_Data/Extension/v2_ext_final/EPA_v2_1B2a_Petroleum_Systems_Transport.nc'
netCDF_trans_description = 'EXTENSION to the Gridded EPA Inventory - Petroleum Systems Emissions - IPCC Source Category 1B2a - Oil Transport'
title_trans_str = "EPA methane emissions from transport"
title_trans_diff_str = "Emissions from gas transport: 2020-2012"

gridded_ref_outputfile = '../Final_Gridded_Data/Extension/v2_ext_final/EPA_v2_1B2a_Petroleum_Systems_Refining.nc'
netCDF_ref_description = 'EXTENSION to the Gridded EPA Inventory - Petroleum Systems Emissions - IPCC Source Category 1B2a - Refining'
title_ref_str = "EPA methane emissions from refining"
title_ref_diff_str = "Emissions from refining difference: 2020-2012"

#output gridded proxy data
grid_emi_inputfile = '../Final_Gridded_Data/Extension/v2_input_data/Petroleum_Grid_Emi.nc'


In [None]:
# Define local variables
start_year = 2012  #First year in emission timeseries
end_year = 2018    #Last year in emission timeseries
ext_year = 2020    #last year in extended dataset
end_year_idx = 2018-2012 #index of the year 2018
year_range = [*range(start_year, ext_year+1,1)] #List of emission years
year_range_str=[str(i) for i in year_range]
num_years = len(year_range)

# Define constants
Avogadro   = 6.02214129 * 10**(23)  #molecules/mol
Molarch4   = 16.04                  #g/mol
Res01      = 0.1                    # degrees
Res_01     = 0.01
tg_scale   = 0.001                  #Tg scale number [New file allows for the exclusion of the territories] 

# Continental US Lat/Lon Limits (for netCDF files)
Lon_left = -130       #deg
Lon_right = -60       #deg
Lat_low  = 20         #deg
Lat_up  = 55          #deg
loc_dimensions = [Lat_low, Lat_up, Lon_left, Lon_right]

ilat_start = int((90+Lat_low)/Res01) #1100:1450 (continental US range)
ilat_end = int((90+Lat_up)/Res01)
ilon_start = abs(int((-180-Lon_left)/Res01)) #500:1200 (continental US range)
ilon_end = abs(int((-180-Lon_right)/Res01))

# Number of days in each month
month_day_leap  = [  31,  29,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]
month_day_nonleap = [  31,  28,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]

# Month arrays
month_range_str = ['January','February','March','April','May','June','July','August','September','October','November','December']
num_months = len(month_range_str)

area_map, lat001, lon001 = data_load_fn.load_area_map_001(Grid_area001_inputfile)
area_map01, Lat01, Lon01 = data_load_fn.load_area_map_01(Grid_area01_inputfile)[0:3]
#Select relevant Continental 0.1 x0.1 domain
Lat_01 = Lat01[ilat_start:ilat_end]
Lon_01 = Lon01[ilon_start:ilon_end]
area_matrix_01 = data_fn.regrid001_to_01(area_map, Lat_01, Lon_01)
area_matrix_01 *= 10000  #convert from m2 to cm2


In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

In [None]:
# Track run time
ct = datetime.datetime.now() 
it = ct.timestamp() 
print("current time:", ct) 

## Step 1. Read in Gridding Groups

In [None]:
names = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "GHGI Map - E&P", usecols = "A:B",skiprows = 1, header = 0)
colnames = names.columns.values
ghgi_prod_map = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "GHGI Map - E&P", usecols = "A:B", skiprows = 2, names = colnames)
#drop rows with no data, remove the parentheses and ""
ghgi_prod_map = ghgi_prod_map[ghgi_prod_map['GHGI_Emi_Group'] != 'na']
ghgi_prod_map = ghgi_prod_map[ghgi_prod_map['GHGI_Emi_Group'].notna()]
ghgi_prod_map['GHGI_Source']= ghgi_prod_map['GHGI_Source'].str.replace(r"\(","- ")
ghgi_prod_map['GHGI_Source']= ghgi_prod_map['GHGI_Source'].str.replace(r"\)","")
ghgi_prod_map['GHGI_Source']= ghgi_prod_map['GHGI_Source'].str.replace(r'"',"")
ghgi_prod_map.reset_index(inplace=True, drop=True)
display(ghgi_prod_map)

#load emission group - proxy map
names = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "Proxy Map - E&P", usecols = "A:C",skiprows = 1, header = 0)
colnames = names.columns.values
proxy_prod_map = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "Proxy Map - E&P", usecols = "A:C", skiprows = 1, names = colnames)
display((proxy_prod_map))

        
#Transport
#load GHGI Mapping Groups
names = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "GHGI Map - Trans", usecols = "A:B",skiprows = 1, header = 0)
colnames = names.columns.values
ghgi_trans_map = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "GHGI Map - Trans", usecols = "A:B", skiprows = 2, names = colnames)
#drop rows with no data, remove the parentheses and ""
ghgi_trans_map = ghgi_trans_map[ghgi_trans_map['GHGI_Emi_Group'] != 'na']
ghgi_trans_map = ghgi_trans_map[ghgi_trans_map['GHGI_Emi_Group'].notna()]
ghgi_trans_map['GHGI_Source']= ghgi_trans_map['GHGI_Source'].str.replace(r"\(","- ")
ghgi_trans_map['GHGI_Source']= ghgi_trans_map['GHGI_Source'].str.replace(r"\)","")
ghgi_trans_map['GHGI_Source']= ghgi_trans_map['GHGI_Source'].str.replace(r'"',"")
ghgi_trans_map.reset_index(inplace=True, drop=True)
display(ghgi_prod_map)

#load emission group - proxy map
names = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "Proxy Map - Trans", usecols = "A:C",skiprows = 1, header = 0)
colnames = names.columns.values
proxy_trans_map = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "Proxy Map - Trans", usecols = "A:C", skiprows = 1, names = colnames)
display((proxy_prod_map))

        
#Refining
#load GHGI Mapping Groups
names = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "GHGI Map - Ref", usecols = "A:B",skiprows = 1, header = 0)
colnames = names.columns.values
ghgi_ref_map = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "GHGI Map - Ref", usecols = "A:B", skiprows = 2, names = colnames)
#drop rows with no data, remove the parentheses and ""
ghgi_ref_map = ghgi_ref_map[ghgi_ref_map['GHGI_Emi_Group'] != 'na']
ghgi_ref_map = ghgi_ref_map[ghgi_ref_map['GHGI_Emi_Group'].notna()]
ghgi_ref_map['GHGI_Source']= ghgi_ref_map['GHGI_Source'].str.replace(r"\(","- ")
ghgi_ref_map['GHGI_Source']= ghgi_ref_map['GHGI_Source'].str.replace(r"\)","")
ghgi_ref_map['GHGI_Source']= ghgi_ref_map['GHGI_Source'].str.replace(r'"',"")
ghgi_ref_map.reset_index(inplace=True, drop=True)
display(ghgi_prod_map)

#load emission group - proxy map
names = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "Proxy Map - Ref", usecols = "A:C",skiprows = 1, header = 0)
colnames = names.columns.values
proxy_ref_map = pd.read_excel(Petr_Mapping_inputfile, sheet_name = "Proxy Map - Ref", usecols = "A:C", skiprows = 1, names = colnames)
display((proxy_prod_map))



-----------
## Step 2. Read in v2 Grid Group Emissions
----------

In [None]:
#These data will be assigned to 'proxy_+ghgi_emi_name' (because original proxy mapping is not 1:1 with GHGI group)
#All proxy data will be in 0.1x0.1xyear dimensions
#asign 2018 values to years 2019 ad 2020

nc_in = Dataset(grid_emi_inputfile, 'r', format='NETCDF4')
Emissions_expl_nongrid = np.zeros([num_years])
Emissions_prod_nongrid = np.zeros([num_years])
Emissions_trans_nongrid = np.zeros([num_years])
Emissions_ref_nongrid = np.zeros([num_years])

unique_groups2 = (np.unique(proxy_prod_map['GHGI_Emi_Group']))
unique_groups2 = list(unique_groups2[unique_groups2 != 'Emi_not_mapped'])
unique_groups3 = list(np.unique(proxy_trans_map['GHGI_Emi_Group']))
unique_groups4 = list(np.unique(proxy_ref_map['GHGI_Emi_Group']))
unique_groups = unique_groups2+unique_groups3+unique_groups4
print(unique_groups2)

for igroup in np.arange(0,len(proxy_prod_map)):
    if proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_not_mapped':
        continue
    else:
        vars()['Proxy_'+proxy_prod_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
        temp = nc_in['Ext_'+proxy_prod_map['GHGI_Emi_Group'][igroup]][:,:,:]
        for iyear in np.arange(0,num_years):
            if year_range[iyear] <= end_year:
                vars()['Proxy_'+proxy_prod_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] = temp[:,:,iyear]
            else:
                vars()['Proxy_'+proxy_prod_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] = temp[:,:,end_year_idx]
            
for igroup in np.arange(0,len(proxy_trans_map)):
    vars()['Proxy_'+proxy_trans_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    temp = nc_in['Ext_'+proxy_trans_map['GHGI_Emi_Group'][igroup]][:,:,:]
    for iyear in np.arange(0,num_years):
        if year_range[iyear] <= end_year:
            vars()['Proxy_'+proxy_trans_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] = temp[:,:,iyear]
        else:
            #print('here')
            vars()['Proxy_'+proxy_trans_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] = temp[:,:,end_year_idx]

for igroup in np.arange(0,len(proxy_ref_map)):
    vars()['Proxy_'+proxy_ref_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    temp = nc_in['Ext_'+proxy_ref_map['GHGI_Emi_Group'][igroup]][:,:,:]
    for iyear in np.arange(0,num_years):
        if year_range[iyear] <= end_year:
            vars()['Proxy_'+proxy_ref_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] = temp[:,:,iyear]
        else:
            #print('here')
            vars()['Proxy_'+proxy_ref_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] = temp[:,:,end_year_idx]


#assign 2018 values to years 2019 and 2020
for iyear in np.arange(0,num_years):
    if year_range[iyear] <= end_year:
        Emissions_expl_nongrid[iyear] = nc_in['Emissions_expl_nongrid'][iyear]
        Emissions_prod_nongrid[iyear] = nc_in['Emissions_prod_nongrid'][iyear]
        Emissions_trans_nongrid[iyear] = nc_in['Emissions_trans_nongrid'][iyear]
        Emissions_ref_nongrid[iyear] = nc_in['Emissions_ref_nongrid'][iyear]
    else:
        #print('here')
        Emissions_expl_nongrid[iyear] = nc_in['Emissions_expl_nongrid'][end_year_idx]
        Emissions_prod_nongrid[iyear] = nc_in['Emissions_prod_nongrid'][end_year_idx]
        Emissions_trans_nongrid[iyear] = nc_in['Emissions_trans_nongrid'][end_year_idx]
        Emissions_ref_nongrid[iyear] = nc_in['Emissions_ref_nongrid'][end_year_idx]

CONUS_frac_expl = np.zeros([num_years])
CONUS_frac_prod = np.zeros([num_years])
CONUS_frac_trans = np.zeros([num_years])
CONUS_frac_ref = np.zeros([num_years])

for iyear in np.arange(0, num_years):
    sum_emi = 0
    sum_emi1 = 0
    sum_emi2 = 0
    sum_emi3 = 0
    for igroup in np.arange(0,len(proxy_prod_map)):
        if proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_not_mapped':
            continue
        else:
            if proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_OilWellExp' or \
                proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_ConvCompExp' or \
                proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_HFCompExp' or \
                proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_OilWellDrilledExp':
                sum_emi += np.sum( vars()['Proxy_'+proxy_prod_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])
            else:
                sum_emi1 += np.sum( vars()['Proxy_'+proxy_prod_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])
    for igroup in np.arange(0,len(proxy_trans_map)):
        sum_emi2 += np.sum( vars()['Proxy_'+proxy_trans_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])
    for igroup in np.arange(0,len(proxy_ref_map)):
        sum_emi3 += np.sum( vars()['Proxy_'+proxy_ref_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])
    CONUS_frac_expl[iyear] = Emissions_expl_nongrid[iyear]/sum_emi
    CONUS_frac_prod[iyear] = Emissions_prod_nongrid[iyear]/sum_emi1
    CONUS_frac_trans[iyear] = Emissions_trans_nongrid[iyear]/sum_emi2
    CONUS_frac_ref[iyear] = Emissions_ref_nongrid[iyear]/sum_emi3

print(CONUS_frac_expl)    
print(CONUS_frac_prod)
print(CONUS_frac_trans)
print(CONUS_frac_ref)

-----------
## Step 3. Read in and Format 2022 US EPA GHGI Emissions
----------

### Step 3.1. Production and Exploration Emissions

In [None]:
# Emissions are in units of MG (= 1x10-6 Tg, 1e-3 kt)
# Emissions on this tab account for reductions

names = pd.read_excel(EPA_Petr_inputfile, sheet_name = "Production_CH4 (MT)", usecols = "A:AG", skiprows = 3, header = 0, nrows = 1)
colnames = names.columns.values
EPA_emi_prod_Petr = pd.read_excel(EPA_Petr_inputfile, sheet_name = "Production_CH4 (MT)", usecols = "A:AG", skiprows = 5, names = colnames, nrows = 125)
EPA_emi_prod_Petr= EPA_emi_prod_Petr.drop(columns = ['Emission\nSource No.'])
EPA_emi_prod_Petr.rename(columns={EPA_emi_prod_Petr.columns[0]:'Source'}, inplace=True)
EPA_emi_prod_Petr['Source']= EPA_emi_prod_Petr['Source'].str.replace(r"\(","- ")
EPA_emi_prod_Petr['Source']= EPA_emi_prod_Petr['Source'].str.replace(r"\)","")
EPA_emi_prod_Petr['Source']= EPA_emi_prod_Petr['Source'].str.replace(r'"',"")
EPA_emi_prod_Petr = EPA_emi_prod_Petr.fillna('')
EPA_emi_prod_Petr = EPA_emi_prod_Petr.drop(columns = [*range(1990, start_year,1)])
EPA_emi_prod_Petr.reset_index(inplace=True, drop=True)
display(EPA_emi_prod_Petr)

### Step 3.2. Read in Petroleum Transport 

In [None]:
# Emissions are in units of MT (= 1x10-6 Tg)

names = pd.read_excel(EPA_Petr_inputfile, sheet_name = "Transportation Emissions", usecols = "A:AI", skiprows = 32, header = 0, nrows = 1)
colnames = names.columns.values
EPA_emi_trans_Petr = pd.read_excel(EPA_Petr_inputfile, sheet_name = "Transportation Emissions", usecols = "A:AI", skiprows = 34, names = colnames, nrows = 18)
EPA_emi_trans_Petr= EPA_emi_trans_Petr.drop(columns = ['Emission\nSource No.', 'Unnamed: 2', 'Emission Units'])
EPA_emi_trans_Petr.rename(columns={EPA_emi_trans_Petr.columns[0]:'Source'}, inplace=True)
EPA_emi_trans_Petr = EPA_emi_trans_Petr.fillna('')
EPA_emi_trans_Petr = EPA_emi_trans_Petr.drop(columns = [*range(1990, start_year,1)])
EPA_emi_trans_Petr.reset_index(inplace=True, drop=True)
display(EPA_emi_trans_Petr)


### Step 3.3. Read in Petroleum Refining 

In [None]:
# Emissions are in units of MT (= 1x10-6 Tg)

names = pd.read_excel(EPA_Petr_inputfile, sheet_name = "Refinery Emissions", usecols = "A:AI", skiprows = 7, header = 0, nrows = 1)
colnames = names.columns.values
EPA_emi_ref_Petr = pd.read_excel(EPA_Petr_inputfile, sheet_name = "Refinery Emissions", usecols = "A:AI", skiprows = 8, names = colnames, nrows = 25)
EPA_emi_ref_Petr= EPA_emi_ref_Petr.drop(columns = ['Emission\nSource No.', 'Scaling Factor for 1990-2009 ','Units'])
EPA_emi_ref_Petr.rename(columns={EPA_emi_ref_Petr.columns[0]:'Source'}, inplace=True)
EPA_emi_ref_Petr = EPA_emi_ref_Petr.fillna('')
EPA_emi_ref_Petr = EPA_emi_ref_Petr.drop(columns = [*range(1990, start_year,1)])
EPA_emi_ref_Petr.reset_index(inplace=True, drop=True)
display(EPA_emi_ref_Petr)

### Step 3.4. Read in Total Petroleum Emissions

In [None]:
# Read in total production + exploration emissions (with methane reductions accounted for)
# data are in kt

names = pd.read_excel(EPA_Petr_inputfile, sheet_name = "CH4 Summary", usecols = "A:AF", skiprows = 4, header = 0, nrows = 1)
colnames = names.columns.values
EPA_emi_total_Petr = pd.read_excel(EPA_Petr_inputfile, sheet_name = "CH4 Summary", usecols = "A:AF", skiprows = 19, names = colnames, nrows = 5)
EPA_emi_total_Petr.rename(columns={EPA_emi_total_Petr.columns[0]:'Source'}, inplace=True)
EPA_emi_total_Petr = EPA_emi_total_Petr.drop(columns = [*range(1990, start_year,1)])
EPA_emi_total_Petr.reset_index(inplace=True, drop=True)
display(EPA_emi_total_Petr)

#### 3.2. Split Emissions into Gridding Groups

In [None]:
start_year_idx = EPA_emi_prod_Petr.columns.get_loc(start_year)
stop_year_idx = EPA_emi_prod_Petr.columns.get_loc(end_year)+1
ghgi_prod_groups = ghgi_prod_map['GHGI_Emi_Group'].unique()
ghgi_trans_groups = ghgi_trans_map['GHGI_Emi_Group'].unique()
ghgi_ref_groups = ghgi_ref_map['GHGI_Emi_Group'].unique()


for igroup in np.arange(0,len(ghgi_prod_groups)):
    vars()[ghgi_prod_groups[igroup]] = np.zeros(num_years)
    source_temp = ghgi_prod_map.loc[ghgi_prod_map['GHGI_Emi_Group'] == ghgi_prod_groups[igroup], 'GHGI_Source']
    pattern_temp  = '|'.join(source_temp) 
    if ghgi_prod_groups[igroup] =='Emi_OilWellProd':
        pattern_temp += '|Produced Water - Regular Pressure Wells|Produced Water - Low Pressure Wells'
    emi_temp = EPA_emi_prod_Petr[EPA_emi_prod_Petr['Source'].str.contains(pattern_temp)]
    vars()[ghgi_prod_groups[igroup]][:] = np.where(emi_temp.iloc[:,start_year_idx:] =='',[0],emi_temp.iloc[:,start_year_idx:]).sum(axis=0)/float(1000)
    
for igroup in np.arange(0,len(ghgi_trans_groups)):
    vars()[ghgi_trans_groups[igroup]] = np.zeros(num_years)
    source_temp = ghgi_trans_map.loc[ghgi_trans_map['GHGI_Emi_Group'] == ghgi_trans_groups[igroup], 'GHGI_Source']
    pattern_temp  = '|'.join(source_temp) 
    emi_temp = EPA_emi_trans_Petr[EPA_emi_trans_Petr['Source'].str.contains(pattern_temp)]
    vars()[ghgi_trans_groups[igroup]][:] = np.where(emi_temp.iloc[:,start_year_idx:] =='',[0],emi_temp.iloc[:,start_year_idx:]).sum(axis=0)/float(1000)
    
for igroup in np.arange(0,len(ghgi_ref_groups)):
    vars()[ghgi_ref_groups[igroup]] = np.zeros(num_years)
    source_temp = ghgi_ref_map.loc[ghgi_ref_map['GHGI_Emi_Group'] == ghgi_ref_groups[igroup], 'GHGI_Source']
    pattern_temp  = '|'.join(source_temp) 
    emi_temp = EPA_emi_ref_Petr[EPA_emi_ref_Petr['Source'].str.contains(pattern_temp)] 
    vars()[ghgi_ref_groups[igroup]][:] = np.where(emi_temp.iloc[:,start_year_idx:] =='',[0],emi_temp.iloc[:,start_year_idx:]).sum(axis=0)/float(1000)

    
print('QA/QC: Check Production, Transport, Refining Emission Sum against GHGI Summary Emissions')
for iyear in np.arange(0,num_years): 
    sum_emi = 0
    for igroup in np.arange(0,len(ghgi_prod_groups)):
        sum_emi += vars()[ghgi_prod_groups[igroup]][iyear]
    for igroup in np.arange(0,len(ghgi_trans_groups)):
        sum_emi += vars()[ghgi_trans_groups[igroup]][iyear]
    for igroup in np.arange(0,len(ghgi_ref_groups)):
        sum_emi += vars()[ghgi_ref_groups[igroup]][iyear]
        
    summary_emi = EPA_emi_total_Petr.iloc[0,iyear+1]+EPA_emi_total_Petr.iloc[1,iyear+1] +EPA_emi_total_Petr.iloc[2,iyear+1]+\
                    EPA_emi_total_Petr.iloc[3,iyear+1]
    #Check 1 - make sure that the sums from all the regions equal the totals reported
    diff1 = abs(sum_emi - summary_emi)/((sum_emi + summary_emi)/2)
    print(summary_emi)
    print(sum_emi)
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL (check Production & summary tabs): ', diff1,'%') 
        
## Note: The numbers will not be exactly the same do to conversions and rounding in the Transport sector (between the 
## Transportation Emissions tab and the CH4 summary tab). This is not an error, just a difference. 

--------------
## Step 4. Grid Data
-------------

##### Step 4.1 Allocate emissions to the CONUS region (0.1x0.1)

In [None]:
# Allocate national emissions (Tg) onto a 0.1x0.1 grid using gridcell level 'Proxy_Groups'

DEBUG =1
#Define emission arrays
Emissions_array_01_expl = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_array_01_prod = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_array_01_trans = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_array_01_ref = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_nongrid = np.zeros([num_years])

# For each year, distribute natinal emissions onto a grid proxies specified in the Proxy_Mapping file

print('**QA/QC Check: Sum of national gridded emissions vs. GHGI national emissions')

for igroup in np.arange(0,len(proxy_prod_map)):
    if proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] != 'Emi_not_mapped':
        proxy_temp = vars()['Proxy_'+proxy_prod_map.loc[igroup,'GHGI_Emi_Group']][:,:,:]
        for iyear in np.arange(0,num_years):
            proxy_frac = data_fn.safe_div(proxy_temp[:,:,iyear], np.sum(proxy_temp[:,:,iyear]))
            if proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_OilWellExp' or \
                            proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_ConvCompExp' or \
                            proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_HFCompExp' or \
                            proxy_prod_map.loc[igroup,'GHGI_Emi_Group'] == 'Emi_OilWellDrilledExp': 
                ghgi_temp = vars()[proxy_prod_map.loc[igroup,'GHGI_Emi_Group']][iyear] * (1-CONUS_frac_expl[iyear])
                if np.sum(proxy_frac)==0 and ghgi_temp ==0:
                    Emissions_array_01_prod[:,:,iyear] += ghgi_temp * 0
                else:
                    Emissions_array_01_expl[:,:,iyear] += ghgi_temp * proxy_frac[:,:]
            else:
                ghgi_temp = vars()[proxy_prod_map.loc[igroup,'GHGI_Emi_Group']][iyear] * (1-CONUS_frac_prod[iyear])
                if np.sum(proxy_frac)==0 and ghgi_temp ==0:
                    Emissions_array_01_prod[:,:,iyear] += ghgi_temp * 0
                else:
                    Emissions_array_01_prod[:,:,iyear] += ghgi_temp * proxy_frac[:,:]
            Emissions_nongrid[iyear] += vars()[proxy_prod_map.loc[igroup,'GHGI_Emi_Group']][iyear] - ghgi_temp
    else:
        for iyear in np.arange(0,num_years):
            Emissions_nongrid[iyear] += vars()[proxy_prod_map.loc[igroup,'GHGI_Emi_Group']][iyear]

##Transport
for igroup in np.arange(0,len(proxy_trans_map)):
    proxy_temp = vars()['Proxy_'+proxy_trans_map.loc[igroup,'GHGI_Emi_Group']][:,:,:]
    for iyear in np.arange(0,num_years):
        proxy_frac = data_fn.safe_div(proxy_temp[:,:,iyear], np.sum(proxy_temp[:,:,iyear]))
        ghgi_temp = vars()[proxy_trans_map.loc[igroup,'GHGI_Emi_Group']][iyear] * (1-CONUS_frac_trans[iyear])
        Emissions_array_01_trans[:,:,iyear] += ghgi_temp * proxy_frac[:,:]
        Emissions_nongrid[iyear] += vars()[proxy_trans_map.loc[igroup,'GHGI_Emi_Group']][iyear] - ghgi_temp

##Refining
for igroup in np.arange(0,len(proxy_ref_map)):
    proxy_temp = vars()['Proxy_'+proxy_ref_map.loc[igroup,'GHGI_Emi_Group']][:,:,:]
    for iyear in np.arange(0,num_years):
        proxy_frac = data_fn.safe_div(proxy_temp[:,:,iyear], np.sum(proxy_temp[:,:,iyear]))
        ghgi_temp = vars()[proxy_ref_map.loc[igroup,'GHGI_Emi_Group']][iyear] * (1-CONUS_frac_ref[iyear])
        Emissions_array_01_ref[:,:,iyear] += ghgi_temp * proxy_frac[:,:]
        Emissions_nongrid[iyear] += vars()[proxy_ref_map.loc[igroup,'GHGI_Emi_Group']][iyear] - ghgi_temp

    
for iyear in np.arange(0, num_years):    
    calc_emi = np.sum(Emissions_array_01_expl[:,:,iyear])+np.sum(Emissions_array_01_prod[:,:,iyear])+ \
                np.sum(Emissions_array_01_trans[:,:,iyear])+np.sum(Emissions_array_01_ref[:,:,iyear])+ \
                np.sum(Emissions_nongrid[iyear]) 

    summary_emi = EPA_emi_total_Petr.iloc[0,iyear+1]+EPA_emi_total_Petr.iloc[1,iyear+1] +EPA_emi_total_Petr.iloc[2,iyear+1]+\
                    EPA_emi_total_Petr.iloc[3,iyear+1] 
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG==1:
        print(calc_emi)
        print(summary_emi)
    if abs(emi_diff) < 0.0001:
        print('Year '+ year_range_str[iyear]+': Difference < 0.01%: PASS')
    else: 
        print('Year '+ year_range_str[iyear]+': Difference > 0.01%: FAIL, diff: '+str(emi_diff))
        
ct = datetime.datetime.now() 
print("current time:", ct)

#### 4.2 Calculate Gridded Emission Fluxes (molec./cm2/s) (0.1x0.1)

In [None]:
#Convert emissions to emission flux
# conversion: kt emissions to molec/cm2/s flux

Flux_array_01_annual_expl = np.zeros([len(Lat_01),len(Lon_01),num_years])
Flux_array_01_annual_prod = np.zeros([len(Lat_01),len(Lon_01),num_years])
Flux_array_01_annual_trans = np.zeros([len(Lat_01),len(Lon_01),num_years])
Flux_array_01_annual_ref = np.zeros([len(Lat_01),len(Lon_01),num_years])
print('**QA/QC Check: Sum of national gridded emissions vs. GHGI national emissions')
  
for iyear in np.arange(0,num_years):
    calc_emi = 0
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
    else:
        year_days = np.sum(month_day_nonleap)

    conversion_factor_01 = 10**9 * Avogadro / float(Molarch4 *year_days * 24 * 60 *60) / area_matrix_01
    Flux_array_01_annual_expl[:,:,iyear] = Emissions_array_01_expl[:,:,iyear]*conversion_factor_01
    Flux_array_01_annual_prod[:,:,iyear] = Emissions_array_01_prod[:,:,iyear]*conversion_factor_01
    Flux_array_01_annual_trans[:,:,iyear] = Emissions_array_01_trans[:,:,iyear]*conversion_factor_01
    Flux_array_01_annual_ref[:,:,iyear] = Emissions_array_01_ref[:,:,iyear]*conversion_factor_01
    #convert back to mass to check
    conversion_factor_annual = 10**9 * Avogadro / float(Molarch4 *year_days * 24 * 60 *60) / area_matrix_01
    calc_emi = np.sum(Flux_array_01_annual_expl[:,:,iyear]/conversion_factor_annual)+\
                np.sum(Flux_array_01_annual_prod[:,:,iyear]/conversion_factor_annual)+\
                np.sum(Flux_array_01_annual_trans[:,:,iyear]/conversion_factor_annual)+\
                np.sum(Flux_array_01_annual_ref[:,:,iyear]/conversion_factor_annual)+\
                np.sum(Emissions_nongrid[iyear])
    summary_emi = EPA_emi_total_Petr.iloc[0,iyear+1]+EPA_emi_total_Petr.iloc[1,iyear+1] +EPA_emi_total_Petr.iloc[2,iyear+1]+\
                    EPA_emi_total_Petr.iloc[3,iyear+1] 
    emi_diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if DEBUG==1:
        print(calc_emi)
        print(summary_emi)
    if abs(emi_diff) < 0.0001:
        print('Year '+ year_range_str[iyear]+': Difference < 0.01%: PASS')
    else: 
        print('Year '+ year_range_str[iyear]+': Difference > 0.01%: FAIL, diff: '+str(emi_diff))
        
Flux_Emissions_Total_annual_expl = Flux_array_01_annual_expl
Flux_Emissions_Total_annual_prod = Flux_array_01_annual_prod
Flux_Emissions_Total_annual_trans = Flux_array_01_annual_trans
Flux_Emissions_Total_annual_ref = Flux_array_01_annual_ref

-------------
## Step 5. Write netCDF
------------

In [None]:
# yearly data
#Initialize file
data_IO_fn.initialize_netCDF(gridded_expl_outputfile, netCDF_expl_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write data to netCDF
nc_out = Dataset(gridded_expl_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual_expl
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded emissions written to file: {}" .format(os.getcwd())+gridded_expl_outputfile)
print('')

data_IO_fn.initialize_netCDF(gridded_prod_outputfile, netCDF_prod_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write data to netCDF
nc_out = Dataset(gridded_prod_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual_prod
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded emissions written to file: {}" .format(os.getcwd())+gridded_prod_outputfile)
print('')

data_IO_fn.initialize_netCDF(gridded_trans_outputfile, netCDF_trans_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write data to netCDF
nc_out = Dataset(gridded_trans_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual_trans
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded emissions written to file: {}" .format(os.getcwd())+gridded_trans_outputfile)
print('')

data_IO_fn.initialize_netCDF(gridded_ref_outputfile, netCDF_ref_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write data to netCDF
nc_out = Dataset(gridded_ref_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual_ref
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded emissions written to file: {}" .format(os.getcwd())+gridded_ref_outputfile)
print('')


----------
## Step 6. Plot Gridded Data
---------

#### Step 6.1. Plot Annual Emission Fluxes

In [None]:
#Plot annual data for entire timeseries
scale_max = 10
save_flag = 0
save_outfile = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual_expl, Lat_01, Lon_01, year_range, title_expl_str, scale_max,save_flag,save_outfile)

In [None]:
#Plot annual data for entire timeseries
scale_max = 10
save_flag = 0
save_outfile = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual_prod, Lat_01, Lon_01, year_range, title_prod_str, scale_max,save_flag,save_outfile)

#Plot annual data for entire timeseries
scale_max = 10
save_flag = 0
save_outfile = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual_trans, Lat_01, Lon_01, year_range, title_trans_str, scale_max,save_flag,save_outfile)

#Plot annual data for entire timeseries
scale_max = 10
save_flag = 0
save_outfile = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual_ref, Lat_01, Lon_01, year_range, title_ref_str, scale_max,save_flag,save_outfile)

#### Step 6.2 Plot Difference between first and last inventory year

In [None]:
# Plot difference between last and first year
save_flag = 0
save_outfile = ''
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual_expl, Lat_01, Lon_01, year_range, title_expl_diff_str,save_flag,save_outfile)

save_flag = 0
save_outfile = ''
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual_prod, Lat_01, Lon_01, year_range, title_prod_diff_str,save_flag,save_outfile)

save_flag = 0
save_outfile = ''
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual_trans, Lat_01, Lon_01, year_range, title_trans_diff_str,save_flag,save_outfile)

save_flag = 0
save_outfile = ''
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual_ref, Lat_01, Lon_01, year_range, title_ref_diff_str,save_flag,save_outfile)


In [None]:
ct = datetime.datetime.now() 
ft = ct.timestamp() 
time_elapsed = (ft-it)/(60*60)
print('Time to run: '+str(time_elapsed)+' hours')
print('** EXTENSION_GEPA_1B2a_Petroleum_Systems: COMPLETE **')