# Gridded EPA Methane Inventory
## Category: 1B2b Natural Gas Systems - Processing Segment

***
#### Authors: 
Joannes D. Maasakkers, Erin E. McDuffie
#### Date Last Updated: 
see Step 0
#### Notebook Purpose: 
This Notebook calculates and reports annual gridded (0.1⁰x0.1⁰) methane emission fluxes (molec./cm2/s) from natural gas systems processing segment in the CONUS region between 2012-2018. 
#### Summary & Notes:
EPA GHGI gas processing emissions are read in from the GHGI Natural Gas Systems workbook at the national level. Emissions are then distributed onto a 0.1x0.1 degree grid as a function of emission group. The activity/proxy data used to spatially distribute emissions from each group include Enverus Processing plant locations and GHGRP processing plant emissions. Emissions data are calculated as a function of year, using annual data from GHGRP and a single snapshot of current processing plant locations from Enverus. Annual emission fluxes (molec./cm2/s) are written to final netCDFs in the ‘/code/Final_Gridded_Data/’ folder. 
***

**Potential update: Could alternatively read in O&G journal data, match with GHGRP plant-emissions by county. Then calculate national emissions/throughput ratio by county first, then taking national average. Could then calculate relative emissions based on O&G journal throughput and then distribute emissions by county (for all plants not matched to GHGRP data). Would require matching GHGRP & O&G journal based on county, then mapping based on county. **

-------
## Step 0. Set-Up Notebook Modules, Functions, and Local Parameters and Constants
_____

In [None]:
#Confirm working directory
import os
import time
modtime = os.path.getmtime('./1B2b_Processing.ipynb')
modificationTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(modtime))
print("This file was last modified on: ", modificationTime)
print("The directory we are working in is {}" .format(os.getcwd()))

In [None]:
## Include plots within notebook
%matplotlib inline

In [None]:
# Import base modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import pyodbc
import PyPDF2 as pypdf
import tabula as tb
from datetime import datetime
from copy import copy

# Import additional modules
from mpl_toolkits.basemap import Basemap

# Load netCDF (for manipulating netCDF file types)
from netCDF4 import Dataset

# Set up ticker
import matplotlib.ticker as ticker

#add path for the global function module (file)
import sys
module_path = os.path.abspath(os.path.join('../Global_Functions/'))
#print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

# Load functions
import data_load_functions as data_load_fn
import data_functions as data_fn
import data_IO_functions as data_IO_fn
import data_plot_functions as data_plot_fn

In [None]:
#INPUT Files
# Assign global file names
global_filenames = data_load_fn.load_global_file_names()
State_ANSI_inputfile = global_filenames[0]
#County_ANSI_inputfile = global_filenames[1]
pop_map_inputfile = global_filenames[2]
Grid_area01_inputfile = global_filenames[3]
Grid_area001_inputfile = global_filenames[4]
Grid_state001_ansi_inputfile = global_filenames[5]
#Grid_county001_ansi_inputfile = global_filenames[6]
globalinputlocation = global_filenames[0][0:20]
print(globalinputlocation)

# EPA Inventory Data
EPA_NG_inputfile = globalinputlocation+'GHGI/Ch3_Energy/NaturalGasSystems_1990-2018_GHGI_2020-04-11.xlsx'

#proxy mapping file
NG_Mapping_inputfile = './InputData/NaturalGas_Processing_ProxyMapping.xlsx'

#Activity Data
Enverus_NG_ProcPlant_inputfile = globalinputlocation+'Enverus/Midstream/Processing_Plants_CONUS_onshore_WGS84_01x01.xls'
GHGRP_facility_inputfile = './InputData/GHGRP_Facility_Info.csv'
GHGRP_subpartw_inputfile = './InputData/EF_W_EMISSIONS_SOURCE_GHG.xlsx'

OGJ_ProcPlant_inputfile = './InputData/OGJ_2015_processing_plants.pdf'


#OUTPUT FILES
gridded_outputfile = '../Final_Gridded_Data/EPA_v2_1B2b_Natural_Gas_Processing.nc'
netCDF_description = 'Gridded EPA Inventory - Natural Gas Systems Emissions - IPCC Source Category 1B2b - Processing'
title_str = "EPA methane emissions from gas processing"
title_diff_str = "Emissions from gas processing difference: 2018-2012"

#output gridded proxy data
grid_emi_outputfile = '../Final_Gridded_Data/Extension/v2_input_data/NG_Processing_Grid_Emi.nc'

In [None]:
# Define local variables
start_year = 2012  #First year in emission timeseries
end_year = 2018    #Last year in emission timeseries
year_range = [*range(start_year, end_year+1,1)] #List of emission years
year_range_str=[str(i) for i in year_range]
num_years = len(year_range)

# Define constants
Avogadro   = 6.02214129 * 10**(23)  #molecules/mol
Molarch4   = 16.04                  #g/mol
Res01      = 0.1                    # degrees

# Continental US Lat/Lon Limits (for netCDF files)
Lon_left = -130       #deg
Lon_right = -60       #deg
Lat_low  = 20         #deg
Lat_up  = 55          #deg
loc_dimensions = [Lat_low, Lat_up, Lon_left, Lon_right]

ilat_start = int((90+Lat_low)/Res01) #1100:1450 (continental US range)
ilat_end = int((90+Lat_up)/Res01)
ilon_start = abs(int((-180-Lon_left)/Res01)) #500:1200 (continental US range)
ilon_end = abs(int((-180-Lon_right)/Res01))

# Number of days in each month
month_day_leap  = [  31,  29,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]
month_day_nonleap = [  31,  28,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]
month_tag = ['01','02','03','04','05','06','07','08','09','10','11','12']
month_dict = {'January':1, 'February':2,'March':3,'April':4,'May':5,'June':6, 'July':7,'August':8,'September':9,'October':10,\
             'November':11,'December':12}

# Month arrays
month_range_str = ['January','February','March','April','May','June','July','August','September','October','November','December']
num_months = len(month_range_str)
#num_regions = 7

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
//prevent auto-scrolling

In [None]:
# Track run time
ct = datetime.now() 
it = ct.timestamp() 
print("current time:", ct) 

____
## Step 1. Load in State ANSI data and Area Maps
_____

In [None]:
# State-level ANSI Data
#Read the state ANSI file array
State_ANSI, name_dict, abbr_dict = data_load_fn.load_state_ansi(State_ANSI_inputfile)[0:3]
#QA: number of states
print('Read input file: '+ f"{State_ANSI_inputfile}")
print('Total "States" found: ' + '%.0f' % len(State_ANSI))
print(' ')

# 0.01 x0.01 degree Data
# State ANSI IDs and grid cell area (m2) maps
state_ANSI_map = data_load_fn.load_state_ansi_map(Grid_state001_ansi_inputfile)
area_map, lat001, lon001 = data_load_fn.load_area_map_001(Grid_area001_inputfile)

# 0.1 x0.1 degree data
# grid cell area and state ANSI maps
Lat01, Lon01 = data_load_fn.load_area_map_01(Grid_area01_inputfile)[1:3]
#Select relevant Continental 0.1 x0.1 domain
Lat_01 = Lat01[ilat_start:ilat_end]
Lon_01 = Lon01[ilon_start:ilon_end]
area_matrix_01 = data_fn.regrid001_to_01(area_map, Lat_01, Lon_01)
area_matrix_01 *= 10000  #convert from m2 to cm2
state_ANSI_map_01 = data_fn.regrid001_to_01(state_ANSI_map, Lat_01, Lon_01)
del area_map, lat001, lon001

# Print time
ct = datetime.now() 
print("current time:", ct) 

-------------
## Step 2: Read-in and Format Proxy Data
-------------

#### Step 2.0 Read in and Process O&G Journal Data - NOT USED (potential update)

#### Step 2.1 Read In Proxy Mapping File & Make Proxy Arrays

#### Step 2.1.1 Format Proxy Group Arrays

In [None]:
#load GHGI Mapping Groups
names = pd.read_excel(NG_Mapping_inputfile, sheet_name = "GHGI Map - Proc", usecols = "A:B",skiprows = 1, header = 0)
colnames = names.columns.values
ghgi_proc_map = pd.read_excel(NG_Mapping_inputfile, sheet_name = "GHGI Map - Proc", usecols = "A:B", skiprows = 2, names = colnames)
#drop rows with no data, remove the parentheses and ""
ghgi_proc_map = ghgi_proc_map[ghgi_proc_map['GHGI_Emi_Group'] != 'na']
ghgi_proc_map = ghgi_proc_map[ghgi_proc_map['GHGI_Emi_Group'].notna()]
ghgi_proc_map['GHGI_Source']= ghgi_proc_map['GHGI_Source'].str.replace(r"\(","")
ghgi_proc_map['GHGI_Source']= ghgi_proc_map['GHGI_Source'].str.replace(r"\)","")
ghgi_proc_map.reset_index(inplace=True, drop=True)
display(ghgi_proc_map)

#load emission group - proxy map
names = pd.read_excel(NG_Mapping_inputfile, sheet_name = "Proxy Map - Proc", usecols = "A:D",skiprows = 1, header = 0)
colnames = names.columns.values
proxy_proc_map = pd.read_excel(NG_Mapping_inputfile, sheet_name = "Proxy Map - Proc", usecols = "A:D", skiprows = 1, names = colnames)
display((proxy_proc_map))

#create empty proxy and emission group arrays (add months for proxy variables that have monthly data)
for igroup in np.arange(0,len(proxy_proc_map)):
    if proxy_proc_map.loc[igroup, 'Month_Flag'] == 1:
        vars()[proxy_proc_map.loc[igroup,'Proxy_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years,num_months])
        vars()[proxy_proc_map.loc[igroup,'Proxy_Group']+'_nongrid'] = np.zeros([num_years,num_months])
        vars()[ghgi_proc_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years,num_months])
    else:
        vars()[proxy_proc_map.loc[igroup,'Proxy_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
        vars()[proxy_proc_map.loc[igroup,'Proxy_Group']+'_nongrid'] = np.zeros([num_years])
        vars()[ghgi_proc_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years])
        
emi_group_names = np.unique(ghgi_proc_map['GHGI_Emi_Group'])
print('QA/QC: Is the number of emission groups the same for the proxy and emissions tabs?')
if (len(emi_group_names) == len(np.unique(proxy_proc_map['GHGI_Emi_Group']))):
    print('PASS')
else:
    print('FAIL')

#### Step 2.1.2 Read In Enverus Processing Plant locations (pre-processed in ArcMap)

In [None]:
Env_ProcPlant_loc = pd.read_excel(Enverus_NG_ProcPlant_inputfile, usecols= "C,F,I,K,L,AE,AF,AI,AH,AI:AK", header = 0)
Map_EnvProcPlants = np.zeros([len(Lat_01),len(Lon_01)]) #data represent a snapshot in time that is applied to entire timeseries
Map_EnvProcPlants_nongrid = np.zeros([1])

for iplant in np.arange(0,len(Env_ProcPlant_loc)):
    if Env_ProcPlant_loc['Lon'][iplant] > Lon_left and Env_ProcPlant_loc['Lon'][iplant] < Lon_right \
        and Env_ProcPlant_loc['Lat'][iplant] > Lat_low and Env_ProcPlant_loc['Lat'][iplant] < Lat_up:
        ilat = int((Env_ProcPlant_loc['Lat'][iplant] - Lat_low)/Res01)
        ilon = int((Env_ProcPlant_loc['Lon'][iplant] - Lon_left)/Res01)
        #if Env_ProcPlant_loc['Throughput'][iplant] >0:
        Map_EnvProcPlants[ilat,ilon] += 1
    else:
        Map_EnvProcPlants_nongrid += 1   
print('Total Processing Plants on grid: ',np.sum(Map_EnvProcPlants[:,:]))
print('Total Processing Plants off grid: ',np.sum(Map_EnvProcPlants_nongrid))

#correct data so no thruput > 1000
#Env_ProcPlant_loc.loc[Env_ProcPlant_loc['THROUGHPUT']>1000,'THROUGHPUT']=\
#Env_ProcPlant_loc.loc[Env_ProcPlant_loc['THROUGHPUT']>1000,'THROUGHPUT']/10


#to be applied later to calculate fraction of AK emissions (no HI plants)
AK_plant_fraction = 5/len(Env_ProcPlant_loc)
#print(AK_plant_fraction)

#### Step 2.1.3 Read In GHGRP Facility Data

In [None]:
#a) Read in the GHGRP data
facility_info = pd.read_csv(GHGRP_facility_inputfile)
facility_emissions = pd.read_excel(GHGRP_subpartw_inputfile)
facility_emissions = facility_emissions[facility_emissions['INDUSTRY_SEGMENT'] =='Onshore natural gas processing [98.230(a)(3)]']
facility_emissions = facility_emissions[facility_emissions['TOTAL_REPORTED_CH4_EMISSIONS'] >0]
facility_emissions = facility_emissions[facility_emissions['REPORTING_YEAR'] <2019]
facility_emissions.reset_index(drop=True,inplace=True)

facility_emissions['State'] = ''
facility_emissions['County'] = ''
facility_emissions['City'] = ''
facility_emissions['Zip'] = 0
facility_emissions['Lat'] = 0
facility_emissions['Lon'] = 0

#b) match GHGRP facility and emissions data
# for each entry in the data file (each facility each year), match the facility ID to the ID in the
# GHGRP facility info file, then append the corresponding location data to the emissions array
for index in np.arange(len(facility_emissions)):
    #print(index)
    ilocation = np.where(facility_info['V_GHG_EMITTER_FACILITIES.FACILITY_ID'] == facility_emissions['FACILITY_ID'][index])[0][0]
    #for iloc in len(ilocation)
    facility_emissions.loc[index, 'State'] = facility_info['V_GHG_EMITTER_FACILITIES.STATE'][ilocation]
    facility_emissions.loc[index, 'County'] = facility_info['V_GHG_EMITTER_FACILITIES.COUNTY'][ilocation]
    facility_emissions.loc[index, 'City'] = facility_info['V_GHG_EMITTER_FACILITIES.CITY'][ilocation]
    facility_emissions.loc[index, 'Zip'] = facility_info['V_GHG_EMITTER_FACILITIES.ZIP'][ilocation]
    facility_emissions.loc[index, 'Lat'] = facility_info['V_GHG_EMITTER_FACILITIES.LATITUDE'][ilocation]
    facility_emissions.loc[index, 'Lon'] = facility_info['V_GHG_EMITTER_FACILITIES.LONGITUDE'][ilocation]

In [None]:
# make plant-specific arrays for each year

print('QA/QC: Check that all GHGRP emissions are allocated to specific plants')
for iyear in np.arange(0,num_years):
    facility_emissions_temp = facility_emissions[facility_emissions['REPORTING_YEAR'] ==year_range[iyear]]
    facility_emissions_temp.reset_index(drop=True,inplace=True)
    GHGRP_plants = pd.DataFrame({'FID':facility_emissions_temp['FACILITY_ID'].unique()})
    GHGRP_plants['Name'] = ' '
    GHGRP_plants['State'] = ' '
    GHGRP_plants['County'] = ' '
    GHGRP_plants['City'] = ' '
    GHGRP_plants['Zip'] = 0
    GHGRP_plants['Lat'] = 0.0
    GHGRP_plants['Lon'] = 0.0
    GHGRP_plants['TgCH4'] = 0.0

    #Put everything in per-plant array
    for idx in np.arange(len(facility_emissions_temp)):
        iFID = np.where(GHGRP_plants['FID'] == facility_emissions_temp['FACILITY_ID'][idx])[0][0]
        GHGRP_plants.loc[iFID,'Name']   = facility_emissions_temp['FACILITY_NAME'][idx]
        GHGRP_plants.loc[iFID,'State']  = facility_emissions_temp['State'][idx]
        GHGRP_plants.loc[iFID,'County'] = facility_emissions_temp['County'][idx]
        GHGRP_plants.loc[iFID,'City'] = facility_emissions_temp['City'][idx]
        GHGRP_plants.loc[iFID,'Zip']    = facility_emissions_temp['Zip'][idx]
        GHGRP_plants.loc[iFID,'Lat']    = facility_emissions_temp['Lat'][idx]
        GHGRP_plants.loc[iFID,'Lon']    = facility_emissions_temp['Lon'][idx]
        GHGRP_plants.loc[iFID,'TgCH4'] += facility_emissions_temp['TOTAL_REPORTED_CH4_EMISSIONS'][idx]/1e6
    
    vars()['GHGRP_plants'+'_'+year_range_str[iyear]] = GHGRP_plants
    diff1 = abs(facility_emissions_temp['TOTAL_REPORTED_CH4_EMISSIONS'].sum()/1e6 -GHGRP_plants['TgCH4'].sum())/ \
        ((facility_emissions_temp['TOTAL_REPORTED_CH4_EMISSIONS'].sum()/1e6 + GHGRP_plants['TgCH4'].sum())/2)
    #print(summary_emi)
    #print(sum_emi2[iyear])
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL: ', diff1,'%') 
    print('Number of GHGRP Plants: ', len(vars()['GHGRP_plants'+'_'+year_range_str[iyear]]))

In [None]:
#correct County data for GHGRP plants for each year data

for iyear in np.arange(0,num_years):
    GHGRP_temp_data = vars()['GHGRP_plants'+'_'+year_range_str[iyear]].copy()
    for iplant in np.arange(0,len(GHGRP_temp_data)):
        if (pd.isna(GHGRP_temp_data['County'][iplant])):
            #DEBUG# print(iyear, iplant, GHGRP_temp_data.loc[iplant,'City'], GHGRP_temp_data.loc[iplant,'State'])
            if GHGRP_temp_data.loc[iplant,'City'] in ['Carlsbad', 'MALAGA']:
                GHGRP_temp_data.loc[iplant,'County'] = 'Eddy'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Crockett':
                GHGRP_temp_data.loc[iplant,'County'] = 'Houston'
            elif GHGRP_temp_data.loc[iplant,'City'] in ['pecos', 'Balmorhea', 'Orla','Toyah']:
                GHGRP_temp_data.loc[iplant,'County'] = 'Reeves'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Kermit':
                GHGRP_temp_data.loc[iplant,'County'] = 'Winkler'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Coyanosa':
                GHGRP_temp_data.loc[iplant,'County'] = 'Pecos'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Rangely':
                GHGRP_temp_data.loc[iplant,'County'] = 'Rio Blanco'
            elif GHGRP_temp_data.loc[iplant,'City'] =='PITTSBURG':
                GHGRP_temp_data.loc[iplant,'County'] = 'Camp'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Stanley':
                GHGRP_temp_data.loc[iplant,'County'] = 'Mountrail'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Lindsay':
                GHGRP_temp_data.loc[iplant,'County'] = 'Cooke'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Loving County':
                GHGRP_temp_data.loc[iplant,'County'] = 'Loving'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Stanton':
                GHGRP_temp_data.loc[iplant,'County'] = 'Martin'
            elif GHGRP_temp_data.loc[iplant,'City'] =='McKittrick':
                GHGRP_temp_data.loc[iplant,'County'] = 'Kern'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Briggsdale':
                GHGRP_temp_data.loc[iplant,'County'] = 'Weld'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Old Ocean':
                GHGRP_temp_data.loc[iplant,'County'] = 'Brazoria'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Coushatta':
                GHGRP_temp_data.loc[iplant,'County'] = 'Red River'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Garrison':
                GHGRP_temp_data.loc[iplant,'County'] = 'Nacogdoches'
            elif GHGRP_temp_data.loc[iplant,'City'] =='Odessa':
                GHGRP_temp_data.loc[iplant,'County'] = 'Ector'
        
    vars()['GHGRP_plants'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()
        
    print('Year: ', year_range_str[iyear])
    print('On grid (Tg): ',np.sum(vars()['GHGRP_plants'+'_'+year_range_str[iyear]]['TgCH4'])) 

In [None]:
# For each year of GHGRP data, match GHGRP plants to Enverus data
# note there is only one available year of Enverus data

print('QA/QC: Number of GHGRP plants not in Enverus data set')
for iyear in np.arange(0,num_years):
    GHGRP_temp_data = vars()['GHGRP_plants'+'_'+year_range_str[iyear]].copy()

    GHGRP_temp_data['match_flag'] = 0
    GHGRP_temp_data['Env_name'] = ''
    GHGRP_temp_data['Env_county'] = ''
    GHGRP_temp_data['Env_state'] = ''
    GHGRP_temp_data['Env_throughput'] = 0
    Env_ProcPlants_notmatched = Env_ProcPlant_loc.copy()
    rows_to_delete = []

    #First, find exact matching lat/lon facilities
    for iplant in np.arange(0,len(GHGRP_temp_data)):
        lat_temp = round(GHGRP_temp_data['Lat'][iplant], 2)
        lon_temp = round(GHGRP_temp_data['Lon'][iplant], 2)
        match_lat = np.where(round(Env_ProcPlant_loc['Lat'],2) == lat_temp)
        match_lon = np.where(round(Env_ProcPlant_loc['Lon'],2) == lon_temp)
        if np.size(match_lat) > 0 and np.size(match_lon) > 0:
            if np.size(match_lat) ==1:
                if match_lat[0][0] in match_lon[0]: #check whether the same index is found in the lat/lon lists
                    GHGRP_temp_data.loc[iplant,'match_flag'] = 1
                    GHGRP_temp_data.loc[iplant,'Env_name'] = Env_ProcPlant_loc.loc[match_lat[0][0], 'NAME']
                    GHGRP_temp_data.loc[iplant,'Env_county'] = Env_ProcPlant_loc.loc[match_lat[0][0], 'CNTY_NAME']
                    GHGRP_temp_data.loc[iplant,'Env_state'] = Env_ProcPlant_loc.loc[match_lat[0][0], 'STATE_NAME']
                    GHGRP_temp_data.loc[iplant,'Env_throughput'] = Env_ProcPlant_loc.loc[match_lat[0][0], 'THROUGHPUT']
                    rows_to_delete = np.append(rows_to_delete, match_lat[0][0])
            else:
                for idx in np.arange(0, np.size(match_lat)): #loop through the matching lat values
                    if match_lat[0][idx] in match_lon[0] and \
                        Env_ProcPlant_loc.loc[match_lat[0][idx], 'THROUGHPUT'] > 0: #check whether the same index is found in the lat/lon lists
                        GHGRP_temp_data.loc[iplant,'match_flag'] = 1
                        GHGRP_temp_data.loc[iplant,'Env_name'] = Env_ProcPlant_loc.loc[match_lat[0][idx], 'NAME']
                        GHGRP_temp_data.loc[iplant,'Env_county'] = Env_ProcPlant_loc.loc[match_lat[0][idx], 'CNTY_NAME']
                        GHGRP_temp_data.loc[iplant,'Env_state'] = Env_ProcPlant_loc.loc[match_lat[0][idx], 'STATE_NAME']
                        GHGRP_temp_data.loc[iplant,'Env_throughput'] = Env_ProcPlant_loc.loc[match_lat[0][idx], 'THROUGHPUT']
                        rows_to_delete = np.append(rows_to_delete, match_lat[0][idx])
            

    rows_to_delete = rows_to_delete.astype(int)
    Env_ProcPlants_notmatched = Env_ProcPlant_loc.drop(rows_to_delete)
   

    #SECOND, find all Enverus plants within each country that did not match and try to match based on proximity to GHGRP plants
    for iplant in np.arange(0,len(GHGRP_temp_data)):
    
        if GHGRP_temp_data.loc[iplant,'match_flag'] !=1:
            if pd.isna(GHGRP_temp_data['County'][iplant]):
                continue #this should no longer trigger since couty data are corrected above
            else:
                if 'county' in GHGRP_temp_data['County'][iplant].lower():
                    match = np.where(Env_ProcPlants_notmatched['CNTY_NAME'].str.contains(GHGRP_temp_data['County'][iplant][:-7], case=False))
                elif 'parish' in GHGRP_temp_data['County'][iplant].lower():
                    match = np.where(Env_ProcPlants_notmatched['CNTY_NAME'].str.contains(GHGRP_temp_data['County'][iplant][:-7], case=False))
                elif 'borough' in GHGRP_temp_data['County'][iplant].lower():
                    match = np.where(Env_ProcPlants_notmatched['CNTY_NAME'].str.contains(GHGRP_temp_data['County'][iplant][:-8], case=False))
                else:
                    match = np.where(Env_ProcPlants_notmatched['CNTY_NAME'].str.contains(GHGRP_temp_data['County'][iplant], case=False))
                if np.size(match) >0 :
                    
                #match is the list of enverus plants within the given county (not already matched)
                # if both lat and lon is closest to a single plant, assign that plant regardless of throughput
                # if lat and lon are closest to two different plants, then filter the throughput for non-zeros
                #      if all throughputs are zeros, assign based on whichever lat/lons are closest
                #      if some throughputs are non-zero, then assign based on whichever of those are closest to GHGRP 
                #(to increase chance of non-zero throughoutput)
                #MATCH IS NOT THE BEST SO COULD UPDATE TO MATCH BASED ON FACILITY NAME TOO
                    lat_temp = round(GHGRP_temp_data['Lat'][iplant], 2)
                    lon_temp = round(GHGRP_temp_data['Lon'][iplant], 2)
                    list_envmat = Env_ProcPlants_notmatched.iloc[match]
                    vallat, idxlat = min((val, idx) for (idx, val) in enumerate(abs(list_envmat.loc[:,'Lat']-lat_temp)))
                    vallon, idxlon = min((val, idx) for (idx, val) in enumerate(abs(list_envmat.loc[:,'Lon']-lon_temp)))
                    if idxlat == idxlon:
                        GHGRP_temp_data.loc[iplant,'match_flag'] = 1
                        GHGRP_temp_data.loc[iplant,'Env_name'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlat], 'NAME']
                        GHGRP_temp_data.loc[iplant,'Env_county'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlat],'CNTY_NAME']
                        GHGRP_temp_data.loc[iplant,'Env_state'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlat], 'STATE_NAME']
                        GHGRP_temp_data.loc[iplant,'Env_throughput'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlat], 'THROUGHPUT']
                        rows_to_delete = np.append(rows_to_delete, list_envmat.index[idxlat])
                    else:
                        list_envmat_filter = list_envmat[list_envmat['THROUGHPUT']>0]
                        if np.size(list_envmat_filter) ==0:
                            if vallat < vallon: #assign location based on whether the lat or lon value in enverus is closer to GHGRP loc data
                                GHGRP_temp_data.loc[iplant,'match_flag'] = 1
                                GHGRP_temp_data.loc[iplant,'Env_name'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlat], 'NAME']
                                GHGRP_temp_data.loc[iplant,'Env_county'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlat],'CNTY_NAME']
                                GHGRP_temp_data.loc[iplant,'Env_state'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlat], 'STATE_NAME']
                                GHGRP_temp_data.loc[iplant,'Env_throughput'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlat], 'THROUGHPUT']
                                rows_to_delete = np.append(rows_to_delete, list_envmat.index[idxlat])
                            elif vallat > vallon:
                                GHGRP_temp_data.loc[iplant,'match_flag'] = 1
                                GHGRP_temp_data.loc[iplant,'Env_name'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlon], 'NAME']
                                GHGRP_temp_data.loc[iplant,'Env_county'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlon],'CNTY_NAME']
                                GHGRP_temp_data.loc[iplant,'Env_state'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlon], 'STATE_NAME']
                                GHGRP_temp_data.loc[iplant,'Env_throughput'] = Env_ProcPlants_notmatched.loc[list_envmat.index[idxlon], 'THROUGHPUT']
                                rows_to_delete = np.append(rows_to_delete, list_envmat.index[idxlon])
                        else:
                            vallat, idxlat = min((val, idx) for (idx, val) in enumerate(abs(list_envmat_filter.loc[:,'Lat']-lat_temp)))
                            vallon, idxlon = min((val, idx) for (idx, val) in enumerate(abs(list_envmat_filter.loc[:,'Lon']-lon_temp)))
                            if vallat < vallon: #assign location based on whether the lat or lon value in enverus is closer to GHGRP loc data
                                GHGRP_temp_data.loc[iplant,'match_flag'] = 1
                                GHGRP_temp_data.loc[iplant,'Env_name'] = Env_ProcPlants_notmatched.loc[list_envmat_filter.index[idxlat], 'NAME']
                                GHGRP_temp_data.loc[iplant,'Env_county'] = Env_ProcPlants_notmatched.loc[list_envmat_filter.index[idxlat],'CNTY_NAME']
                                GHGRP_temp_data.loc[iplant,'Env_state'] = Env_ProcPlants_notmatched.loc[list_envmat_filter.index[idxlat], 'STATE_NAME']
                                GHGRP_temp_data.loc[iplant,'Env_throughput'] = Env_ProcPlants_notmatched.loc[list_envmat_filter.index[idxlat], 'THROUGHPUT']
                                rows_to_delete = np.append(rows_to_delete, list_envmat_filter.index[idxlat])
                            elif vallat > vallon:
                                GHGRP_temp_data.loc[iplant,'match_flag'] = 1
                                GHGRP_temp_data.loc[iplant,'Env_name'] = Env_ProcPlants_notmatched.loc[list_envmat_filter.index[idxlon], 'NAME']
                                GHGRP_temp_data.loc[iplant,'Env_county'] = Env_ProcPlants_notmatched.loc[list_envmat_filter.index[idxlon],'CNTY_NAME']
                                GHGRP_temp_data.loc[iplant,'Env_state'] = Env_ProcPlants_notmatched.loc[list_envmat_filter.index[idxlon], 'STATE_NAME']
                                GHGRP_temp_data.loc[iplant,'Env_throughput'] = Env_ProcPlants_notmatched.loc[list_envmat_filter.index[idxlon], 'THROUGHPUT']
                                rows_to_delete = np.append(rows_to_delete, list_envmat_filter.index[idxlon])
                                

                    #continue
                    #look for matching name, of throughput >0 for points where lat is closer to one and lon closer to another
                    
    rows_to_delete = rows_to_delete.astype(int)
    Env_ProcPlants_notmatched = Env_ProcPlant_loc.drop(rows_to_delete)
    Env_ProcPlants_notmatched.reset_index(inplace=True, drop=True)   
    
    vars()['Env_ProcPlants_notmatched'+'_'+year_range_str[iyear]] = Env_ProcPlants_notmatched.copy()
    vars()['GHGRP_plants'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()
    
    print('Year ', year_range_str[iyear],': ', len(GHGRP_temp_data[GHGRP_temp_data['match_flag']==0]), ' of ', len(GHGRP_temp_data))

#### Step 2.1.4. Calculate the average emissions/throuput ratio for matched plants

In [None]:
avg_emis_throughput_ratio = np.zeros([num_years])

print('QA/QC: Average Emissions to Throughput Ratio')
for iyear in np.arange(0,num_years):
    GHGRP_temp_data = vars()['GHGRP_plants'+'_'+year_range_str[iyear]].copy()

    GHGRP_temp_data['Emis_thru_ratio']=0
    for iplant in np.arange(0,len(GHGRP_temp_data)):
        GHGRP_temp_data.loc[iplant, 'Emis_thru_ratio'] = data_fn.safe_div(GHGRP_temp_data.loc[iplant, 'TgCH4'], \
                                                                        GHGRP_temp_data.loc[iplant, 'Env_throughput'])
    GHGRP_temp_data['Emis_thru_ratio'] = GHGRP_temp_data['Emis_thru_ratio'].replace({0:np.nan})
    avg_emis_throughput_ratio[iyear] = np.nanmedian(GHGRP_temp_data['Emis_thru_ratio'])
    
    vars()['GHGRP_plants'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()
    print('Year ', year_range_str[iyear],': ', avg_emis_throughput_ratio[iyear])
    


#### Step 2.2. Make map of 'emissions' from each plant

In [None]:
# map = GHGRP emissions for each plant that matched, then take all the enverus plants that didnt match, calculate emissions and then add to map
Map_CombProcPlants = np.zeros([len(Lat_01),len(Lon_01),num_years]) #data represent a snapshot in time that is applied to entire timeseries
Map_CombProcPlants_nongrid = np.zeros([num_years])

print('QA/QC: Processing Plant Emissions Gridded:')
for iyear in np.arange(0, num_years):
    #first add GHGRP emissions for matched plants
    GHGRP_temp_data = vars()['GHGRP_plants'+'_'+year_range_str[iyear]].copy()
    for iplant in np.arange(0,len(GHGRP_temp_data)):
        if GHGRP_temp_data.loc[iplant,'match_flag']==1:
            if GHGRP_temp_data['Lon'][iplant] > Lon_left and GHGRP_temp_data['Lon'][iplant] < Lon_right \
                and GHGRP_temp_data['Lat'][iplant] > Lat_low and GHGRP_temp_data['Lat'][iplant] < Lat_up:
                ilat = int((GHGRP_temp_data['Lat'][iplant] - Lat_low)/Res01)
                ilon = int((GHGRP_temp_data['Lon'][iplant] - Lon_left)/Res01)
                #if Env_ProcPlant_loc['Throughput'][iplant] >0:
                Map_CombProcPlants[ilat,ilon,iyear] += GHGRP_temp_data.loc[iplant, 'TgCH4']
            else:
                Map_CombProcPlants_nongrid[iyear] += GHGRP_temp_data.loc[iplant, 'TgCH4']  

    #then add calculated enverus emissions for all non-matched plants
    Env_temp_data = vars()['Env_ProcPlants_notmatched'+'_'+year_range_str[iyear]].copy()
    for iplant in np.arange(0, len(Env_temp_data)):
        
        if Env_temp_data['Lon'][iplant] > Lon_left and Env_temp_data['Lon'][iplant] < Lon_right \
            and Env_temp_data['Lat'][iplant] > Lat_low and Env_temp_data['Lat'][iplant] < Lat_up:
            if Env_temp_data['NAME'][iplant]=='Sherwood I-XIII' and iyear ==0:
                print('here!')
                continue #skip the sherwood plant in 2012 (not porducing yet)
                
            else:
                ilat = int((Env_temp_data['Lat'][iplant] - Lat_low)/Res01)
                ilon = int((Env_temp_data['Lon'][iplant] - Lon_left)/Res01)
                Map_CombProcPlants[ilat,ilon,iyear] += Env_temp_data.loc[iplant, 'THROUGHPUT']*avg_emis_throughput_ratio[iyear]
        else:
            Map_CombProcPlants_nongrid[iyear] += Env_temp_data.loc[iplant, 'THROUGHPUT']*avg_emis_throughput_ratio[iyear]
    
    vars()['GHGRP_plants'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()
    vars()['Env_ProcPlants_notmatched'+'_'+year_range_str[iyear]] = Env_temp_data.copy()
    
    print('Year: ', year_range_str[iyear])
    print('On grid (Tg): ',np.sum(Map_CombProcPlants[:,:, iyear]))
    print('Off grid (Tg): ',np.sum(Map_CombProcPlants_nongrid[iyear]))
    


----------------
## Step 3. Read In EPA GHGI Data
---------------

### Step 3.1. Processing Emissions

In [None]:
# Emissions are in units of MG (= 1x10-6 Tg)

# METHANE
names = pd.read_excel(EPA_NG_inputfile, sheet_name = "Inventory Emissions", usecols = "A:AG", skiprows = 5, header = 0, nrows = 1)
colnames = names.columns.values
EPA_emi_proc_NG = pd.read_excel(EPA_NG_inputfile, sheet_name = "Inventory Emissions", usecols = "A:AG", skiprows = 132, names = colnames, nrows = 15)
EPA_emi_proc_NG= EPA_emi_proc_NG.drop(columns = ['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 3'])
EPA_emi_proc_NG['Source']= EPA_emi_proc_NG['Source'].str.replace(r"\(","")
EPA_emi_proc_NG['Source']= EPA_emi_proc_NG['Source'].str.replace(r"\)","")
EPA_emi_proc_NG = EPA_emi_proc_NG.fillna('')
EPA_emi_proc_NG = EPA_emi_proc_NG.drop(columns = [*range(1990, start_year,1)])
EPA_emi_proc_NG.reset_index(inplace=True, drop=True)
display(EPA_emi_proc_NG)

### Step 3.1.2. Read in Total Processing Emissions

In [None]:
# Read in total processing emissions (with methane reductions accounted for)
# data are in kt

names = pd.read_excel(EPA_NG_inputfile, sheet_name = "SUMMARY CH4", usecols = "A:AD", skiprows = 10, header = 0, nrows = 1)
colnames = names.columns.values
EPA_emi_total_NG_CH4 = pd.read_excel(EPA_NG_inputfile, sheet_name = "SUMMARY CH4", usecols = "A:AD", skiprows = 17, names = colnames, nrows = 5)
EPA_emi_total_NG_CH4.rename(columns={EPA_emi_total_NG_CH4.columns[0]:'Source'}, inplace=True)
EPA_emi_total_NG_CH4 = EPA_emi_total_NG_CH4.drop(columns = [*range(1990, start_year,1)])
EPA_emi_total_NG_CH4.reset_index(inplace=True, drop=True)

print("EPA GHGI Emissions with Reductions (kt)")
display(EPA_emi_total_NG_CH4)

##### 3.1.3. Split Emissions into Gridding Groups (each Group will have the same proxy applied during the gridding)

In [None]:
# Final Emissions in Units of kt
# Use mapping proxy and source files to split the GHGI emissions

start_year_idx = EPA_emi_proc_NG.columns.get_loc(start_year)
end_year_idx = EPA_emi_proc_NG.columns.get_loc(end_year)+1
sum_emi = np.zeros(num_years)

DEBUG=1

ghgi_proc_groups = ghgi_proc_map['GHGI_Emi_Group'].unique()

for igroup in np.arange(0,len(ghgi_proc_groups)): #loop through all groups, finding the GHGI sources in that group and summing emissions for that region, year
        vars()[ghgi_proc_groups[igroup]] = np.zeros([num_years])
        source_temp = ghgi_proc_map.loc[ghgi_proc_map['GHGI_Emi_Group'] == ghgi_proc_groups[igroup], 'GHGI_Source']
        pattern_temp  = '|'.join(source_temp)
        emi_temp = EPA_emi_proc_NG[EPA_emi_proc_NG['Source'].str.contains(pattern_temp)]
        vars()[ghgi_proc_groups[igroup]][:] = np.where(emi_temp.iloc[:,start_year_idx:] =='',[0],emi_temp.iloc[:,start_year_idx:]).sum(axis=0)/float(1000) #convert Mg to kt

#Check against total summary emissions 
print('QA/QC #1: Check Processing Emission Sum against GHGI Summary Emissions')
for iyear in np.arange(0,num_years): 
    for igroup in np.arange(0,len(ghgi_proc_groups)):
        sum_emi[iyear] += vars()[ghgi_proc_groups[igroup]][iyear]
        
    summary_emi = EPA_emi_total_NG_CH4.iloc[2,iyear+1]  
    #Check 1 - make sure that the sums from all the regions equal the totals reported
    diff1 = abs(sum_emi[iyear] - summary_emi)/((sum_emi[iyear] + summary_emi)/2)
    if DEBUG==1:
        print(summary_emi)
        print(sum_emi[iyear])
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL (check Production & summary tabs): ', diff1,'%') 

----------------
## Step 4. Grid Data (using spatial proxies)
---------------

### Step. 4.1. Calculate the monthly and regional weighted arrays

#### Step 4.1.1 Assign the Appropriate Proxy Variable Names

In [None]:
# The names on the *left* need to match the 'NaturalGas_Processing_ProxyMapping' 'Proxy_Group' names 
# (these are initialized in Step 2). 
# The names on the right are the variable names used to caluclate the proxies in this code.
# Names on the *right* need to match those from the code in Step 2.5

#These represent the calculated emissions at each processing plant (Tg CH4)
Map_Plants = Map_CombProcPlants
Map_Plants_nongrid = Map_CombProcPlants_nongrid

#### Step 4.1.2 Calculate the fractional proxies

In [None]:
# Calculate weighting arrays
# Find the fraction of processing plants in each grid cell, relative to the total counts (on and off grid)
# also weight by the number of days in each year

proxy_proc_map_unique = np.unique(proxy_proc_map['Proxy_Group'])

for iyear in np.arange(0,num_years):
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
        month_days = month_day_leap
    else:
        year_days = np.sum(month_day_nonleap)
        month_days = month_day_nonleap  
    
    #Step 1a: weighted proxy ongrid = ongrid proxy * days each year
    #Step 1b: weighted proxy offgrid = offgrid proxy * days each year
    #Step 2a: noramlized weighted proxy ongrid = weighted proxy in each grid cell / (sum weighted proxy ongrid + weighted proxy offgrid)
    #Step 2b: noramlized weighted proxy offgrid = weighted proxy offgrid / (sum weighted proxy ongrid + weighted proxy offgrid)
    print('Check Sum of Proc. Proxy Arrays = 1 for: ', year_range[iyear])
    for iproxy in np.arange(0,len(proxy_proc_map_unique)):
        vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']][:,:,iyear] *= np.sum(month_days)
        vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear] *= np.sum(month_days)
        temp_sum = float(np.sum(vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']][:,:,iyear]) + \
                    np.sum(vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear]))
        vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']][:,:,iyear] = \
                    data_fn.safe_div(vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']][:,:,iyear], temp_sum)
        vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear] = \
                    data_fn.safe_div(vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear], temp_sum)
        proxy_sum = np.sum(vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']][:,:,iyear])+np.sum(vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear])
        if proxy_sum >1.0001 or proxy_sum <0.9999:
            print('CHECK ', proxy_proc_map.loc[iproxy,'Proxy_Group'],': ', proxy_sum)   
        else:
            print('PASS:', proxy_proc_map.loc[iproxy,'Proxy_Group'])


### Step. 4.2. Grid the National Emissions Data, then Calculate 0.1x0.1 degree flux maps

In [None]:
# For the processing segment...
# 1) make flux array with correct dimensions
# 2) weight monthly data by days in month (or year)
# 3) caluclate flux as Flux = GHGI emissions * Proxy Map

Emissions = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_nongrid = np.zeros([num_years])
Emi_not_mapped_sum = np.zeros(num_years)

DEBUG =1

#loop through each emission group, where: Gridded emissions = National emissions * proxy map
for igroup in np.arange(0,len(proxy_proc_map)):
    vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'] = np.zeros([num_years])
    for iyear in np.arange(0,num_years):
        vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += \
            vars()[proxy_proc_map.loc[igroup,'GHGI_Emi_Group']][iyear] * \
            vars()[proxy_proc_map.loc[igroup,'Proxy_Group']][:,:,iyear]
        vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear] += \
            vars()[proxy_proc_map.loc[igroup,'GHGI_Emi_Group']][iyear] * \
            vars()[proxy_proc_map.loc[igroup,'Proxy_Group']+'_nongrid'][iyear]
        Emissions[:,:,iyear] += vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
        Emissions_nongrid[iyear] += vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear]

#Subtract out AK emissions fraction (based on plant count ratio)
for iyear in np.arange(0,num_years):
    for igroup in np.arange(len(proxy_proc_map)):
        vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] -=  \
            vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]* AK_plant_fraction
    Emi_not_mapped_sum[iyear] = np.sum(Emissions[:,:,iyear])* AK_plant_fraction
    Emissions_nongrid[iyear] += Emi_not_mapped_sum[iyear]
    Emissions[:,:,iyear] -= Emissions[:,:,iyear]* AK_plant_fraction

    
# QA/QC gridded emissions
# Check sum of all gridded emissions + emissions not included in gridding (e.g., AK), and other non-gridded areas
print('QA/QC #1: Check weighted emissions against GHGI')   
for iyear in np.arange(0,num_years):
    calc_emi=0
    summary_emi = EPA_emi_total_NG_CH4.iloc[2,iyear+1]
    for igroup in np.arange(0,len(proxy_proc_map)):
        calc_emi +=  np.sum(vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]) 
    calc_emi += Emissions_nongrid[iyear]
    if DEBUG==1:
        print(summary_emi)
        print(calc_emi)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0002:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

#### Step 4.1.4 Save gridded emissions (kt)

In [None]:
#save gridded emissions for each gridding group - for extension

#Initialize file
data_IO_fn.initialize_netCDF(grid_emi_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

unique_groups = np.unique(proxy_proc_map['GHGI_Emi_Group'])
unique_groups = unique_groups[unique_groups != 'Emi_not_mapped']

nc_out = Dataset(grid_emi_outputfile, 'r+', format='NETCDF4')

for igroup in np.arange(0,len(unique_groups)):
    print('Ext_'+unique_groups[igroup])
    if len(np.shape(vars()['Ext_'+unique_groups[igroup]])) ==4:
        ghgi_temp = np.sum(vars()[unique_groups[igroup]],axis=3) #sum month data if data is monthly
    else:
        ghgi_temp = vars()['Ext_'+unique_groups[igroup]]

    # Write data to netCDF
    data_out = nc_out.createVariable('Ext_'+unique_groups[igroup], 'f8', ('lat', 'lon','year'), zlib=True)
    data_out[:,:,:] = ghgi_temp[:,:,:]

#save nongrid data to calculate non-grid fraction extension
data_out = nc_out.createVariable('Emissions_nongrid', 'f8', ('year'), zlib=True)  
data_out[:] = Emissions_nongrid[:]
nc_out.close()

#Confirm file location
print('** SUCCESS **')
print("Gridded emissions (kt) written to file: {}" .format(os.getcwd())+grid_emi_outputfile)
print(' ')

del data_out, ghgi_temp, nc_out

### Step 4.3 Calculate Gridded Fluxes (molec/s/cm2)

In [None]:
#Step 2 -- Calculate fluxes (molec./s/cm2)

#Initialize arrays
check_sum_annual = np.zeros([num_years])
Flux_Emissions_Total_annual = np.zeros([len(Lat_01),len(Lon_01),num_years])
for igroup in np.arange(0,len(proxy_proc_map)):
    vars()['Flux_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']+'_annual'] = np.zeros([len(Lat_01),len(Lon_01),num_years])


#Calculate fluxes
for iyear in np.arange(0,num_years):
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
        month_days = month_day_leap
    else:
        year_days = np.sum(month_day_nonleap)
        month_days = month_day_nonleap 
    
    # calculate fluxes for annual data  (=kt * grams/kt *molec/mol *mol/g *s^-1 * cm^-2)
    conversion_factor_annual = 10**9 * Avogadro / float(Molarch4 * np.sum(month_days) * 24 * 60 *60) / area_matrix_01
    for igroup in np.arange(0,len(proxy_proc_map)):
        vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] *= conversion_factor_annual
        vars()['Flux_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']+'_annual'][:,:,iyear] = vars()['Ext_'+proxy_proc_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
        Flux_Emissions_Total_annual[:,:,iyear] = Emissions[:,:,iyear]*conversion_factor_annual
    check_sum_annual[iyear] += np.sum(Flux_Emissions_Total_annual[:,:,iyear]/conversion_factor_annual) #convert back to emissions to check at end

print(' ')
print('QA/QC #2: Check final gridded fluxes against GHGI')  
# for the sum, check the converted annual emissions (convert back from flux) plus all the non-gridded emissions
for iyear in np.arange(0,num_years):
    calc_emi = check_sum_annual[iyear] + Emi_not_mapped_sum[iyear]
    summary_emi = EPA_emi_total_NG_CH4.iloc[2,iyear+1]
    if DEBUG==1:
        print(calc_emi)
        print(summary_emi)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

-------------
## Step 5. Write gridded (0.1⁰x0.1⁰) data to netCDF files.
-------------

In [None]:
# Initialize netCDF files
data_IO_fn.initialize_netCDF(gridded_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write the Data to netCDF
nc_out = Dataset(gridded_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded annual natural gas processing fluxes written to file: {}" .format(os.getcwd())+gridded_outputfile)
print('')

-------------
## Step 6. Plot Data
-------------

#### 6.1 Plot Annual Emission Fluxes

In [None]:
# Plot annual emissions for each year
scale_max = 10
save_flag = 0
save_outfile = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_str, scale_max,save_flag,save_outfile)

#### 6.2 Plot Difference Between First and Last Inventory Year

In [None]:
# Plot difference between last and first year
save_flag = 0
save_outfile = ''
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_diff_str,save_flag,save_outfile)

#### 6.3 Plot Activity Data Heat Maps

In [None]:
#Map_Plants

# Activity_Map = 0.1x0.1 map of activity data (counts or absolute units)
# Plot_Frac    = 0 or 1 (0= plot activity data in absolute counts, 1= plot fractional activity data)
# Lat          = 0.1 degree Lat values (select range)
# Lon          = 0.1 degree Lon values (select range)
# year_range   = array of inventory years
# title_str    = title of map
# legend_str   = title of legend
# scale_max    = maximum of color scale

Activity_Map = Map_CombProcPlants#Map_Plants
Plot_Frac = 1
Lat = Lat_01
Lon = Lon_01
year_range = year_range
title_str2 = "Proxy - Processing Plant Emissions"
legend_str = "Annual Fraction of National Processing Plant Emissions"
scale_max = 0.05

for iyear in np.arange(0,len(year_range)): 
    my_cmap = copy(plt.cm.get_cmap('rainbow',lut=3000))
    my_cmap._init()
    slopen = 200
    alphas_slope = np.abs(np.linspace(0, 1.0, slopen))
    alphas_stable = np.ones(3003-slopen)
    alphas = np.concatenate((alphas_slope, alphas_stable))
    my_cmap._lut[:,-1] = alphas
    my_cmap.set_under('gray', alpha=0)
    
    Lon_cor = Lon[50:632]-0.05
    Lat_cor = Lat[43:300]-0.05
    
    xpoints = Lon_cor
    ypoints = Lat_cor
    yp,xp = np.meshgrid(ypoints,xpoints)
    
    if np.shape(Activity_Map)[0] == len(year_range):
        if Plot_Frac ==1:
            zp = Activity_Map[iyear,43:300,50:632]/np.sum(Activity_Map[iyear,:,:])
        else:
            zp = Activity_Map[iyear,43:300,50:632]
    elif np.shape(Activity_Map)[2] == len(year_range):
        if Plot_Frac ==1:
            zp = Activity_Map[43:300,50:632,iyear]/np.sum(Activity_Map[:,:,iyear])
        else: 
            zp = Activity_Map[43:300,50:632,iyear]
    #zp = zp/float(10**6 * Avogadro) * (year_days * 24 * 60 * 60) * Molarch4 * float(1e10)
    
    fig, ax = plt.subplots(dpi=300)
    m = Basemap(llcrnrlon=xp.min(), llcrnrlat=yp.min(), urcrnrlon=xp.max(),
                urcrnrlat=yp.max(), projection='merc', resolution='h', area_thresh=5000)
    m.drawmapboundary(fill_color='Azure')
    m.fillcontinents(color='FloralWhite', lake_color='Azure',zorder=1)
    m.drawcoastlines(linewidth=0.5,zorder=3)
    m.drawstates(linewidth=0.25,zorder=3)
    m.drawcountries(linewidth=0.5,zorder=3)
        
        #if Plot_Frac == 1:
        #    scale_max 
    
    xpi,ypi = m(xp,yp)
    #plot = m.pcolor(xpi,ypi,zp.transpose(), cmap=my_cmap, vmin=10**-15, vmax=scale_max, snap=True,zorder=2)
    plot = m.scatter(xpi,ypi,s=20,c=zp.transpose(),cmap=my_cmap,zorder=2,vmin = 10**-15,snap = True,vmax = scale_max)
    cb = m.colorbar(plot, location = "bottom", pad = "1%")        
    tick_locator = ticker.MaxNLocator(nbins=5)
    cb.locator = tick_locator
    cb.update_ticks()
    
    cb.ax.set_xlabel(legend_str,fontsize=10)
    cb.ax.tick_params(labelsize=10)
    Titlestring = str(year_range[iyear])+' '+title_str2
    plt.title(Titlestring, fontsize=14);
    plt.show();

In [None]:
ct = datetime.now() 
ft = ct.timestamp() 
time_elapsed = (ft-it)/(60*60)
print('Time to run: '+str(time_elapsed)+' hours')
print('** GEPA_1B2b_Natural_Gas_Systems_Processing: COMPLETE **')