# Gridded EPA Methane Inventory
## Category: 2B8 Petrochemical Productions & 2C2 Ferroalloy Production

***
#### Authors: 
Joannes D. Maasakkers, Candice F. Z. Chen, Erin E. McDuffie, Revathi Muralidharan
#### Date Last Updated: 
see Step 0
#### Notebook Purpose: 
This Notebook calculates and reports annual gridded (0.1⁰x0.1⁰) methane emission fluxes (molec./cm2/s) from production of ferroalloy and petrochemicals in the CONUS region between 2012-2018. 
#### Summary & Notes:
EPA GHGI emissions from petrochemical and ferroalloy production facilities are read in at the national level. Emissions are first allocated to the facility level using relative facility-level methane emissions from the Greenhouse Gas Reporting Program (GHGRP, Subparts X and K). Resulting facility-level emissions are then distributed onto a 0.1⁰x0.1⁰ grid using a map of grid-level petrochemical and ferroalloy facility locations (from GHGRP). Emissions are converted to flux annual emission fluxes (molec./cm2/s) are written to final netCDFs in the ‘/code/Final_Gridded_Data/’ folder.
***

-------
## Step 0. Set-Up Notebook Modules, Functions, and Local Parameters and Constants
_____

In [None]:
import os
import time
modtime = os.path.getmtime('./2B8_2C2_Industry.ipynb')
modificationTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(modtime))
print("This file was last modified on: ", modificationTime)
print('')
print("The directory we are working in is {}" .format(os.getcwd()))

In [None]:
## Include plots within notebook
%matplotlib inline

In [None]:
# Import base modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import datetime
from copy import copy

# Import additional modules
# Load plotting package Basemap 
# first, set the project library for basemap
# 'r' in front of string is necessary for this block of code to run
from mpl_toolkits.basemap import Basemap

# Load netCDF (for manipulating netCDF file types)
from netCDF4 import Dataset

# Set up ticker
import matplotlib.ticker as ticker

#add path for the global function module (file)
import sys
module_path = os.path.abspath(os.path.join('../Global_Functions/'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load Tabula (for reading tables from PDFs)
import tabula as tb   
    
# Load user-defined global functions (modules)
import data_load_functions as data_load_fn
import data_functions as data_fn
import data_IO_functions as data_IO_fn
import data_plot_functions as data_plot_fn

In [None]:
#INPUT Files
# Assign global file names
global_filenames = data_load_fn.load_global_file_names()
State_ANSI_inputfile = global_filenames[0]
County_ANSI_inputfile = global_filenames[1]
pop_map_inputfile = global_filenames[2]
Grid_area01_inputfile = global_filenames[3]
Grid_area001_inputfile = global_filenames[4]
Grid_state001_ansi_inputfile = global_filenames[5]
Grid_county001_ansi_inputfile = global_filenames[6]

# Specify names of inputs files used in this notebook
EPA_inputfile = '../Global_InputData/GHGI/Ch2_Industry/CH4 emissions from ferroalloys and petrochemicals 1990-2018.xlsx'

Ind_Mapping_inputfile = './InputData/Industry_ProxyMapping.xlsx'
# Activity Data
# GHGRP files
EPA_ghgrp_petrofacility_inputfile = './InputData/SubpartX_Petrochemical_Facilities.csv'
EPA_ghgrp_petro_inputfile = './InputData/SubpartX_Petrochemical.csv'
EPA_ghgrp_ferrofacility_inputfile = './InputData/SubpartK_Ferroalloy_Facilities.csv'
EPA_ghgrp_ferro_inputfile = './InputData/SubpartK_Ferroalloy.csv'


#OUTPUT FILES
gridded_outputfile = '../Final_Gridded_Data/EPA_v2_2B8_2C2_Industry.nc'
netCDF_description = 'Gridded EPA Inventory - Industry - IPCC Source Category 2B5 and 2C1'
gridded_petro_outputfile = '../Final_Gridded_Data/EPA_v2_2B8_Industry_Petrochemical.nc'
netCDF_petro_description = 'Gridded EPA Inventory - Industry Emissions - IPCC Source Category 2B8 - Petrochemical'
gridded_ferro_outputfile = '../Final_Gridded_Data/EPA_v2_2C2_Industry_Ferroalloy.nc'
netCDF_ferro_description = 'Gridded EPA Inventory - Industry Emissions - IPCC Source Category 2C2 - Ferroalloy'
title_str = "EPA methane emissions from industry"
title_petro_str = "EPA methane emissions from petrochemical industry"
title_ferro_str = "EPA methane emissions from ferroalloy production"
title_diff_str = "Emissions from industry total difference: 2018-2012"
title_petro_diff_str = "Emissions from petrochemical difference: 2018-2012"
title_ferro_diff_str = "Emissions from ferroalloy total difference: 2018-2012"

grid_emi_outputfile = '../Final_Gridded_Data/Extension/v2_input_data/Ind_Petro_Ferro_Grid_Emi2.nc'

In [None]:
# Define local variables
start_year = 2012  #First year in emission timeseries
end_year = 2018    #Last year in emission timeseries
year_range = [*range(start_year, end_year+1,1)] #List of emission years
year_range_str=[str(i) for i in year_range]
num_years = len(year_range)

# Define constants
Avogadro   = 6.02214129 * 10**(23)  #molecules/mol
Molarch4   = 16.04                  #g/mol
Res01      = 0.1                    # degrees
tg_scale   = 0.001                  #Tg scale number [New file allows for the exclusion of the territories] 
GWP_CH4    = 25                     #global warming potential of CH4 relative to CO2 (used to convert mass to CO2e units)

# Continental US Lat/Lon Limits (for netCDF files)
Lon_left = -130       #deg
Lon_right = -60       #deg
Lat_low  = 20         #deg
Lat_up  = 55          #deg
loc_dimensions = [Lat_low, Lat_up, Lon_left, Lon_right]

ilat_start = int((90+Lat_low)/Res01) #1100:1450 (continental US range)
ilat_end = int((90+Lat_up)/Res01)
ilon_start = abs(int((-180-Lon_left)/Res01)) #500:1200 (continental US range)
ilon_end = abs(int((-180-Lon_right)/Res01))

# Number of days in each month
month_day_leap  = [  31,  29,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]
month_day_nonleap = [  31,  28,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]

# Month arrays
month_range_str = ['January','February','March','April','May','June','July','August','September','October','November','December']
num_months = len(month_range_str)

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

In [None]:
# Track run time
ct = datetime.datetime.now() 
it = ct.timestamp() 
print("current time:", ct) 

____
## Step 1. Load in State ANSI data and Area Maps
_____

In [None]:
# State-level ANSI Data
#Read the state ANSI file array
#State_ANSI, name_dict = data_load_fn.load_state_ansi(State_ANSI_inputfile)[0:2]
#QA: number of states
#print('Read input file: '+ f"{State_ANSI_inputfile}")
#print('Total "States" found: ' + '%.0f' % len(State_ANSI))
#print(' ')

# 0.01 x0.01 degree Data
# State ANSI IDs and grid cell area (m2) maps
#state_ANSI_map = data_load_fn.load_state_ansi_map(Grid_state001_ansi_inputfile)
area_map, lat001, lon001 = data_load_fn.load_area_map_001(Grid_area001_inputfile)

#County ANSI Data
#Includes State ANSI number, county ANSI number, county name, and country area (square miles)
#pd_counties = pd.read_csv(County_ANSI_inputfile,encoding='latin-1')

#QA: number of counties
#print ('Read input file: ' + f"{County_ANSI_inputfile}")
#print('Total "Counties" found (include PR): ' + '%.0f' % len(pd_counties))
#print(' ')

#Create a placeholder array for county data
#county_array = np.zeros([len(pd_counties),3])

#Populate array with State ANSI number (0), county ANSI number (1), and county area (2)
#for icounty in np.arange(0,len(pd_counties)):
#    county_array[icounty,0] = int(pd_counties.values[icounty,0])
#    county_array[icounty,1] = int(pd_counties.values[icounty,1])
#    county_array[icounty,2] = pd_counties.values[icounty,3]

# 0.01 x0.01 degree Data
# State ANSI IDs and grid cell area (m2) maps
#state_ANSI_map = data_load_fn.load_state_ansi_map(Grid_state001_ansi_inputfile)
#state_ANSI_map = state_ANSI_map.astype('int32')
#county_ANSI_map = data_load_fn.load_county_ansi_map(Grid_county001_ansi_inputfile)
#county_ANSI_map = county_ANSI_map.astype('int32')
area_map, lat001, lon001 = data_load_fn.load_area_map_001(Grid_area001_inputfile)

# 0.1 x0.1 degree data
# grid cell area and state and county ANSI maps
area_map01, Lat01, Lon01 = data_load_fn.load_area_map_01(Grid_area01_inputfile)[0:3]
#Select relevant Continental 0.1 x0.1 domain
Lat_01 = Lat01[ilat_start:ilat_end]
Lon_01 = Lon01[ilon_start:ilon_end]
area_matrix_01 = data_fn.regrid001_to_01(area_map, Lat_01, Lon_01)
area_matrix_01 *= 10000  #convert from m2 to cm2

#state_ANSI_map_01 = data_fn.regrid001_to_01(state_ANSI_map, Lat_01, Lon_01)

# Print time
ct = datetime.datetime.now() 
print("current time:", ct) 

-------------
## Step 2: Read-in and Format Proxy Data
-------------

#### Step 2.1 Read In Proxy Mapping File & Make Proxy Arrays

In [None]:
#load GHGI Mapping Groups
names = pd.read_excel(Ind_Mapping_inputfile, sheet_name = "GHGI Map - Ind", usecols = "A:B",skiprows = 1, header = 0)
colnames = names.columns.values
ghgi_ind_map = pd.read_excel(Ind_Mapping_inputfile, sheet_name = "GHGI Map - Ind", usecols = "A:B", skiprows = 1, names = colnames)
#drop rows with no data, remove the parentheses and ""
ghgi_ind_map = ghgi_ind_map[ghgi_ind_map['GHGI_Emi_Group'] != 'na']
ghgi_ind_map = ghgi_ind_map[ghgi_ind_map['GHGI_Emi_Group'].notna()]
ghgi_ind_map['GHGI_Source']= ghgi_ind_map['GHGI_Source'].str.replace(r"\(","")
ghgi_ind_map['GHGI_Source']= ghgi_ind_map['GHGI_Source'].str.replace(r"\)","")
ghgi_ind_map.reset_index(inplace=True, drop=True)
display(ghgi_ind_map)

#load emission group - proxy map
names = pd.read_excel(Ind_Mapping_inputfile, sheet_name = "Proxy Map - Ind", usecols = "A:D",skiprows = 1, header = 0)
colnames = names.columns.values
proxy_ind_map = pd.read_excel(Ind_Mapping_inputfile, sheet_name = "Proxy Map - Ind", usecols = "A:D", skiprows = 1, names = colnames)
display((proxy_ind_map))

#create empty proxy and emission group arrays (add months for proxy variables that have monthly data)
for igroup in np.arange(0,len(proxy_ind_map)):
    vars()[proxy_ind_map.loc[igroup,'Proxy_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    vars()[proxy_ind_map.loc[igroup,'Proxy_Group']+'_nongrid'] = np.zeros([num_years])
    vars()[proxy_ind_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years])

#### Step 2.1. Read in GHGRP Data

In [None]:
#### Read in GHGRP Subpart X data CH4 based allocation (metric for carbon input)
# NOTE that the facility level methane emissions are different when downloaded from ENVIRO facts vs FLIGHT
# fewer facilities have emissions in Envirofacts than FLIGHT and emissions are overall lower
# FLIGHT downloaded data contains Supart C and Supart X/K emissions. 
# Subpart C is general stational combustion emissions. 
# We have adjusted the code here to only read in Subpart X/K data from Envirofacts
# We also confirmed that methane emissions from FLIGHT download = Envirofacts Subpart C + Subpart X in many cases

#a) Read in the GHGRP facility data
facility_info = pd.read_csv(EPA_ghgrp_petrofacility_inputfile)
facility_emis = pd.read_csv(EPA_ghgrp_petro_inputfile)

#filter emissions data for methane only (in metric tonnes CH4) and for years of interest
facility_emis = facility_emis[facility_emis['X_SUBPART_LEVEL_INFORMATION.GHG_NAME'] == 'Methane']
facility_emis = facility_emis[facility_emis['X_SUBPART_LEVEL_INFORMATION.REPORTING_YEAR'].isin(year_range)]
facility_info = facility_info[facility_info['V_GHG_EMITTER_FACILITIES.YEAR'].isin(year_range)]
facility_info.reset_index(inplace=True, drop=True)
facility_emis.reset_index(inplace=True, drop=True)

#rename common columns and merge into one dataframe
facility_info.rename(columns={'V_GHG_EMITTER_FACILITIES.YEAR':'Year', \
                             'V_GHG_EMITTER_FACILITIES.FACILITY_ID':'Facility_ID', \
                             'V_GHG_EMITTER_FACILITIES.LONGITUDE':'LONGITUDE',
                             'V_GHG_EMITTER_FACILITIES.LATITUDE':'LATITUDE'},inplace=True)
facility_emis.rename(columns={'X_SUBPART_LEVEL_INFORMATION.REPORTING_YEAR':'Year', \
                              'X_SUBPART_LEVEL_INFORMATION.FACILITY_ID':'Facility_ID'},inplace=True)
ghgrp_petro = pd.merge(facility_info, facility_emis)
ghgrp_petro['emis_tg_tot'] = ghgrp_petro['X_SUBPART_LEVEL_INFORMATION.GHG_QUANTITY']/1e6
#display(ghgrp_petro)

#Ferro alloy
#a) Read in the GHGRP facility data
facility_info = pd.read_csv(EPA_ghgrp_ferrofacility_inputfile)
facility_emis = pd.read_csv(EPA_ghgrp_ferro_inputfile)

#filter emissions data for methane only (in metric tonnes CH4) and for years of interest
facility_emis = facility_emis[facility_emis['K_SUBPART_LEVEL_INFORMATION.GHG_NAME'] == 'Methane']
facility_emis = facility_emis[facility_emis['K_SUBPART_LEVEL_INFORMATION.REPORTING_YEAR'].isin(year_range)]
facility_info = facility_info[facility_info['V_GHG_EMITTER_FACILITIES.YEAR'].isin(year_range)]
facility_info.reset_index(inplace=True, drop=True)
facility_emis.reset_index(inplace=True, drop=True)

#rename common columns and merge into one dataframe
facility_info.rename(columns={'V_GHG_EMITTER_FACILITIES.YEAR':'Year', \
                             'V_GHG_EMITTER_FACILITIES.FACILITY_ID':'Facility_ID', \
                             'V_GHG_EMITTER_FACILITIES.LONGITUDE':'LONGITUDE',
                             'V_GHG_EMITTER_FACILITIES.LATITUDE':'LATITUDE'},inplace=True)
facility_emis.rename(columns={'K_SUBPART_LEVEL_INFORMATION.REPORTING_YEAR':'Year', \
                              'K_SUBPART_LEVEL_INFORMATION.FACILITY_ID':'Facility_ID'},inplace=True)
ghgrp_ferro = pd.merge(facility_info, facility_emis)
ghgrp_ferro['emis_tg_tot'] = ghgrp_ferro['K_SUBPART_LEVEL_INFORMATION.GHG_QUANTITY']/1e6
#display(ghgrp_ferro)


#if reading in FLIGHT data: (includes subpart C emissions plus K and X)
#Read Petrochemical GHGRP (convert metric tons CO2e to Tg)
#ghgrp_petro = pd.read_csv('Data/Flight_SubpartX_Petrochemical.csv')
#ghgrp_petro['emis_tg_tot'] = ghgrp_petro['GHG QUANTITY (METRIC TONS CO2e)']*0.000001/GWP_CH4 #tg
        
#Read Ferroalloy GHGRP
#ghgrp_ferro = pd.read_csv('Data/Flight_SubpartK_Ferro.csv')
#ghgrp_ferro['emis_tg_tot'] = ghgrp_ferro['GHG QUANTITY (METRIC TONS CO2e)']*0.000001/GWP_CH4 #tg

### Step 2.2 Allocate Proxy data to Grid Array

In [None]:
# For each facility, use the fraction of reported emissions for each facility relative to the national total
# (= reported facility emissions / sum of all reported facility emissions) to allocate the national GHGI emissions
# Facility emissions for each year are first stored on a grid array for facilities in and outside CONUS region 

# Petro emissions
Map_ghgrppetro = np.zeros([len(Lat_01),len(Lon_01),num_years]) #data represent a snapshot in time that is applied to entire timeseries
Map_ghgrppetro_nongrid = np.zeros([num_years])

for iyear in np.arange(0,len(year_range)):
    petro_temp = ghgrp_petro[ghgrp_petro['Year'] ==year_range[iyear]]
    petro_temp.reset_index(inplace=True, drop=True)
    for ifacility in np.arange(0,len(petro_temp)): 
        if petro_temp['LONGITUDE'][ifacility] > Lon_left and petro_temp['LONGITUDE'][ifacility] < Lon_right \
            and petro_temp['LATITUDE'][ifacility] > Lat_low and petro_temp['LATITUDE'][ifacility] < Lat_up:
            ilat = int((petro_temp['LATITUDE'][ifacility] - Lat_low)/Res01)
            ilon = int((petro_temp['LONGITUDE'][ifacility] - Lon_left)/Res01)
            Map_ghgrppetro[ilat,ilon,iyear] += petro_temp.loc[ifacility, 'emis_tg_tot']
        else:
            Map_ghgrppetro_nongrid[iyear] += petro_temp.loc[ifacility, 'emis_tg_tot']  

# Ferro emissions
Map_ghgrpferro = np.zeros([len(Lat_01),len(Lon_01),num_years]) #data represent a snapshot in time that is applied to entire timeseries
Map_ghgrpferro_nongrid = np.zeros([num_years])

for iyear in np.arange(0,len(year_range)):
    ferro_temp = ghgrp_ferro[ghgrp_ferro['Year'] ==year_range[iyear]]
    ferro_temp.reset_index(inplace=True, drop=True)
    for ifacility in np.arange(0,len(ferro_temp)): 
        if ferro_temp['LONGITUDE'][ifacility] > Lon_left and ferro_temp['LONGITUDE'][ifacility] < Lon_right \
            and ferro_temp['LATITUDE'][ifacility] > Lat_low and ferro_temp['LATITUDE'][ifacility] < Lat_up:
            ilat = int((petro_temp['LATITUDE'][ifacility] - Lat_low)/Res01)
            ilon = int((petro_temp['LONGITUDE'][ifacility] - Lon_left)/Res01)
            Map_ghgrpferro[ilat,ilon,iyear] += ferro_temp.loc[ifacility, 'emis_tg_tot']
        else:
            Map_ghgrpferro_nongrid[iyear] += ferro_temp.loc[ifacility, 'emis_tg_tot']  

----------------
## Step 3. Read In EPA GHGI Data
---------------

#### Step 3.1. Read in Data

In [None]:
# Read Petrochemical GHGI emissions (1990-2018), in kt

#Petrochemicals
EPA_petro_emissions = pd.read_excel(EPA_inputfile, skiprows = 2, sheet_name = "Petrochemicals")
EPA_petro_emissions.rename(columns={EPA_petro_emissions.columns[0]:'Source'}, inplace=True)
EPA_petro_emissions = EPA_petro_emissions.drop(columns = [*range(1990, start_year,1)])
EPA_petro_emissions['Source'] = 'Total Petrochemicals'

#Ferroalloy
EPA_ferro_emissions = pd.read_excel(EPA_inputfile, skiprows = 2, sheet_name = "Ferroalloys")
EPA_ferro_emissions= EPA_ferro_emissions.drop(columns = ['Unnamed: 1'])
EPA_ferro_emissions.rename(columns={EPA_ferro_emissions.columns[0]:'Source'}, inplace=True)
EPA_ferro_emissions = EPA_ferro_emissions.drop(columns = [*range(1990, start_year,1)])
EPA_ferro_emissions = EPA_ferro_emissions.drop([0,2], axis=0)
EPA_ferro_emissions['Source'] = 'Total Ferroalloy'

EPA_Industry = pd.concat([EPA_petro_emissions,EPA_ferro_emissions])
display(EPA_Industry)

Total_EPA_Industry_Emissions = EPA_ferro_emissions.iloc[0,1:]+EPA_petro_emissions.iloc[0,1:]

#### 3.2. Split Emissions into Gridding Groups (each Group will have the same proxy applied during the gridding)

In [None]:
# Final Emissions in Units of kt
# Use mapping proxy and source files to split the GHGI emissions

DEBUG =1

start_year_idx = EPA_Industry.columns.get_loc(start_year)
end_year_idx = EPA_Industry.columns.get_loc(end_year)+1
sum_emi = np.zeros(num_years)

ghgi_ind_groups = ghgi_ind_map['GHGI_Emi_Group'].unique()

for igroup in np.arange(0,len(ghgi_ind_groups)): #loop through all groups, finding the GHGI sources in that group and summing emissions for that region, year
        vars()[ghgi_ind_groups[igroup]] = np.zeros([num_years])
        source_temp = ghgi_ind_map.loc[ghgi_ind_map['GHGI_Emi_Group'] == ghgi_ind_groups[igroup], 'GHGI_Source']
        pattern_temp  = '|'.join(source_temp)
        ##DEBUG## display(pattern_temp)
        emi_temp = EPA_Industry[EPA_Industry['Source'].str.contains(pattern_temp)]
        ##DEBUG## display(emi_temp)
        vars()[ghgi_ind_groups[igroup]][:] = np.where(emi_temp.iloc[:,start_year_idx:] =='',[0],emi_temp.iloc[:,start_year_idx:]).sum(axis=0)#/float(1000) #convert Mg to kt

#Check against total summary emissions 
print('QA/QC #1: Check Processing Emission Sum against GHGI Summary Emissions')
for iyear in np.arange(0,num_years): 
    for igroup in np.arange(0,len(ghgi_ind_groups)):
        sum_emi[iyear] += vars()[ghgi_ind_groups[igroup]][iyear]
        
    summary_emi = Total_EPA_Industry_Emissions[year_range[iyear]]  
    #Check 1 - make sure that the sums from all the regions equal the totals reported
    diff1 = abs(sum_emi[iyear] - summary_emi)/((sum_emi[iyear] + summary_emi)/2)
    if DEBUG ==1:
        print(summary_emi)
        print(sum_emi[iyear])
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL (check Production & summary tabs): ', diff1,'%') 

----------------
## Step 4. Grid Data (using spatial proxies)
---------------

#### Step 4.1.1 Define Proxy maps

In [None]:
# The names on the *left* need to match the 'Industry_ProxyMapping' 'Proxy_Group' names 
# (these are initialized in Step 2). 
# The names on the right are the variable names used to caluclate the proxies in this code.
# Names on the *right* need to match those from the code in Step 2.5

Map_Ferro = Map_ghgrpferro
Map_Ferro_nongrid = Map_ghgrpferro_nongrid
Map_Petro = Map_ghgrppetro
Map_Petro_nongrid = Map_ghgrppetro_nongrid

#### Step 4.1.2 Calculate the fractional proxies

In [None]:
# Calculate weighting arrays (i.e., fractional arrays)
# also weight by the number of days in each year

proxy_ind_map_unique = np.unique(proxy_ind_map['Proxy_Group'])
#print(proxy_proc_map_unique)

for iyear in np.arange(0,num_years):
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
        month_days = month_day_leap
    else:
        year_days = np.sum(month_day_nonleap)
        month_days = month_day_nonleap  
    
    #Step 1a: weighted proxy ongrid = ongrid proxy * days each year
    #Step 1b: weighted proxy offgrid = offgrid proxy * days each year
    #Step 2a: noramlized weighted proxy ongrid = weighted proxy in each grid cell / (sum weighted proxy ongrid + weighted proxy offgrid)
    #Step 2b: noramlized weighted proxy offgrid = weighted proxy offgrid / (sum weighted proxy ongrid + weighted proxy offgrid)
    print('Check That Sum of Ind. Proxy Arrays = 1 for: ', year_range[iyear])
    for iproxy in np.arange(0,len(proxy_ind_map_unique)):
        #DEBUG## print(np.sum(vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']][:,:,iyear]))
        vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']][:,:,iyear] *= np.sum(month_days)
        ##DEBUG## print(np.sum(vars()[proxy_proc_map.loc[iproxy,'Proxy_Group']][:,:,iyear]))
        vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear] *= np.sum(month_days)
        temp_sum = float(np.sum(vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']][:,:,iyear]) + \
                    np.sum(vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear]))
        ##DEBUG## print(temp_sum)
        vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']][:,:,iyear] = \
                    data_fn.safe_div(vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']][:,:,iyear], temp_sum)
        vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear] = \
                    data_fn.safe_div(vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear], temp_sum)
        proxy_sum = np.sum(vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']][:,:,iyear])+np.sum(vars()[proxy_ind_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear])
        ##DEBUG## print(proxy_sum)
        if proxy_sum >1.0001 or proxy_sum <0.9999:
            print('Check ', proxy_ind_map.loc[iproxy,'Proxy_Group'],': ', proxy_sum)   
        else:
            print('Pass')

#### Step. 4.2. Allocate emissions to the CONUS region (0.1x0.1)

In [None]:
# Process...
# 1) make emissions array with correct dimensions
# 2) weight monthly data by days in month (or year)
# 3) caluclate emissions as emissions = GHGI emissions * Proxy Map

DEBUG =1

Emissions = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_Ferro = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_Petro = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_nongrid = np.zeros([num_years])
Emi_not_mapped_sum = np.zeros(num_years)

#loop through each emission group, where: Gridded emissions = National emissions * proxy map
for igroup in np.arange(0,len(proxy_ind_map)):
    vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'] = np.zeros([num_years])
    vars()['Ext_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    for iyear in np.arange(0,num_years):
        vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += \
            vars()[proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][iyear] * \
            vars()[proxy_ind_map.loc[igroup,'Proxy_Group']][:,:,iyear]
        vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear] += \
            vars()[proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][iyear] * \
            vars()[proxy_ind_map.loc[igroup,'Proxy_Group']+'_nongrid'][iyear]
        if 'Ferro' in proxy_ind_map.loc[igroup,'GHGI_Emi_Group']:
            vars()['Ext_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
            Emissions_Ferro[:,:,iyear] += vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
        if 'Petro' in proxy_ind_map.loc[igroup,'GHGI_Emi_Group']:
            #print(igroup)
            vars()['Ext_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
            Emissions_Petro[:,:,iyear] += vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
        Emissions[:,:,iyear] += vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
        Emissions_nongrid[iyear] += vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear]

    
# QA/QC gridded emissions
# Check sum of all gridded emissions + emissions not included in gridding (e.g., AK), and other non-gridded areas
print('QA/QC #1: Check weighted emissions against GHGI')   
for iyear in np.arange(0,num_years):
    summary_emi = Total_EPA_Industry_Emissions[year_range[iyear]]
    calc_emi =  np.sum(Emissions_Ferro[:,:,iyear]) +np.sum(Emissions_Petro[:,:,iyear]) + Emi_not_mapped_sum[iyear] + Emissions_nongrid[iyear]
    if DEBUG==1:
        print(summary_emi)
        print(calc_emi)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0002:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

#### Step 4.2.2 Save gridded emissions (kt)

In [None]:
#save gridded emissions for each gridding group - for extension

#Initialize file
data_IO_fn.initialize_netCDF(grid_emi_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

unique_groups = np.unique(proxy_ind_map['GHGI_Emi_Group'])
unique_groups = unique_groups[unique_groups != 'Emi_not_mapped']

nc_out = Dataset(grid_emi_outputfile, 'r+', format='NETCDF4')

for igroup in np.arange(0,len(unique_groups)):
    print('Ext_'+unique_groups[igroup])
    if len(np.shape(vars()['Ext_'+unique_groups[igroup]])) ==3:
        ghgi_temp = np.sum(vars()['Ext_'+unique_groups[igroup]],axis=2) #sum month data
    else:
        ghgi_temp = vars()['Ext_'+unique_groups[igroup]]

    # Write data to netCDF
    data_out = nc_out.createVariable('Ext_'+unique_groups[igroup], 'f8', ('lat', 'lon','year'), zlib=True)
    data_out[:,:,:] = ghgi_temp

#save nongrid data to calculate non-grid fraction extension
data_out = nc_out.createVariable('Emissions_nongrid', 'f8', ('year'), zlib=True)  
data_out[:] = Emissions_nongrid[:]
nc_out.close()

#Confirm file location
print('** SUCCESS **')
print("Gridded emissions (kt) written to file: {}" .format(os.getcwd())+grid_emi_outputfile)
print(' ')

del data_out, ghgi_temp, nc_out

#### Step 4.3 Calculate Gridded Emission Fluxes (molec./cm2/s) (0.1x0.1)

In [None]:
#Step 2 -- #Convert emissions to emission flux
# conversion: kt emissions to molec/cm2/s flux

DEBUG=1

#Initialize arrays
check_sum_annual = np.zeros([num_years])
Flux_array_01_annual = np.zeros([len(Lat_01),len(Lon_01),num_years])
Flux_array_01_ferro_annual = np.zeros([len(Lat_01),len(Lon_01),num_years])
Flux_array_01_petro_annual = np.zeros([len(Lat_01),len(Lon_01),num_years])
for igroup in np.arange(0,len(proxy_ind_map)):
    vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']+'_annual'] = np.zeros([len(Lat_01),len(Lon_01),num_years])


#Calculate fluxes
for iyear in np.arange(0,num_years):
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
        month_days = month_day_leap
    else:
        year_days = np.sum(month_day_nonleap)
        month_days = month_day_nonleap 
    
    # calculate fluxes for annual data  (=kt * grams/kt *molec/mol *mol/g *s^-1 * cm^-2)
    conversion_factor_annual = 10**9 * Avogadro / float(Molarch4 * np.sum(month_days) * 24 * 60 *60) / area_matrix_01
    print(np.median(conversion_factor_annual))
    for igroup in np.arange(0,len(proxy_ind_map)):
        vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] *= conversion_factor_annual
        vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']+'_annual'][:,:,iyear] = vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
        Flux_array_01_annual[:,:,iyear] = Emissions[:,:,iyear]*conversion_factor_annual
        Flux_array_01_ferro_annual[:,:,iyear] = Emissions_Ferro[:,:,iyear]*conversion_factor_annual
        Flux_array_01_petro_annual[:,:,iyear] = Emissions_Petro[:,:,iyear]*conversion_factor_annual
    check_sum_annual[iyear] = np.sum(Flux_array_01_ferro_annual[:,:,iyear]/conversion_factor_annual) +\
                                np.sum(Flux_array_01_petro_annual[:,:,iyear]/conversion_factor_annual)#convert back to emissions to check at end

print(' ')
print('QA/QC #2: Check final gridded fluxes against GHGI')  
# for the sum, check the converted annual emissions (convert back from flux) plus all the non-gridded emissions
for iyear in np.arange(0,num_years):
    calc_emi = check_sum_annual[iyear] + Emissions_nongrid[iyear]
    summary_emi = Total_EPA_Industry_Emissions[year_range[iyear]]
    if DEBUG==1:
        print(calc_emi)
        print(summary_emi)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')
        
Flux_Emissions_Total_annual = Flux_array_01_annual
Flux_Emissions_Petro_annual = Flux_array_01_petro_annual
Flux_Emissions_Ferro_annual = Flux_array_01_ferro_annual

-------------
## Step 5. Write gridded (0.1⁰x0.1⁰) data to netCDF files.
-------------

In [None]:
# Initialize netCDF files

data_IO_fn.initialize_netCDF(gridded_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)
data_IO_fn.initialize_netCDF(gridded_petro_outputfile, netCDF_petro_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)
data_IO_fn.initialize_netCDF(gridded_ferro_outputfile, netCDF_ferro_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write the Data to netCDF
nc_out = Dataset(gridded_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded industry fluxes written to file: {}" .format(os.getcwd())+gridded_outputfile)
print('')

#Petro
# Write the Data to netCDF
nc_out = Dataset(gridded_petro_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Petro_annual
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded industry fluxes written to file: {}" .format(os.getcwd())+gridded_petro_outputfile)
print('')

#Ferro
# Write the Data to netCDF
nc_out = Dataset(gridded_ferro_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Ferro_annual
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded industry fluxes written to file: {}" .format(os.getcwd())+gridded_ferro_outputfile)
print('')

-------------
## Step 6. Plot Data
-------------

#### Step 7.1. Plot Annual Emission Fluxes

In [None]:
#Plot annual data for entire timeseries
scale_max = 2
save_flag = 0
save_outfile = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_str, scale_max,save_flag,save_outfile)

In [None]:
#Plot each individually (could change to plot each group)
for igroup in np.arange(0,len(proxy_ind_map)):
    temp_plot = vars()['Flux_'+proxy_ind_map.loc[igroup,'GHGI_Emi_Group']]
    data_plot_fn.plot_annual_emission_flux_map(temp_plot, Lat_01, Lon_01, year_range, proxy_ind_map.loc[igroup,'GHGI_Emi_Group'], scale_max,save_flag,save_outfile)

#### Step 7.2 Plot Difference between first and last inventory year

In [None]:
# Plot difference between last and first year for the industry total
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_diff_str,save_flag,save_outfile)

In [None]:
ct = datetime.datetime.now() 
ft = ct.timestamp() 
time_elapsed = (ft-it)/(60*60)
print('Time to run: '+str(time_elapsed)+' hours')
print('** GEPA_2B8_2C2_Industry: COMPLETE **')