# Gridded EPA Methane Inventory
## Category: 1B2b Natural Gas Transmission and Storage

***
#### Authors: 
Erin E. McDuffie
#### Date Last Updated: 
see Step 0
#### Notebook Purpose: 
This Notebook calculates and reports annual gridded (0.1°x0.1°) methane emission fluxes (molec./cm2/s) from Transmission and Storage from Natural Gas Systems in the CONUS region between 2012-2018. 
#### Summary & Notes:
EPA GHGI Transmission and Storage emissions are read in at the national level from the GHGI workbook. Emissions are split into contributions from Transmission and Storage Compressor Stations, Transmission pipelines, Import and Export Terminals, Storage Wells, M&R stations on farm land, and LNG storage stations. The activity/proxy data used to allocate emissions from each group include DOE import and export terminal locations, Enverus midstream data for transmission pipeline locations, Enverus non-associated gas well locations, and a combination of EIA, PHMSA, Enverus, and GHGRP data for LNG storage stations, Transmission, and Storage Compressor Stations. National emissions are spatially distributed onto a 0.1°x0.1° grid based on the emissions/locations of wells, pipeline, stations, and storage facilities.  Emissions are converted to emission flux. Annual emission fluxes (molec./cm2/s) are written to final netCDFs in the ‘/code/Final_Gridded_Data/’ folder.
***

-------
## Step 0. Set-Up Notebook Modules, Functions, and Local Parameters and Constants
_____

In [None]:
#Confirm working directory & print last update time
import os
import time
modtime = os.path.getmtime('./1B2b_TransmissionStorage.ipynb')
modificationTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(modtime))
print("This file was last modified on: ", modificationTime)
print('')
print("The directory we are working in is {}" .format(os.getcwd()))

In [None]:
## Include plots within notebook
%matplotlib inline

In [None]:
# Import base modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import pyodbc
import PyPDF2 as pypdf
import tabula as tb
import shapefile as shp
from datetime import datetime
from copy import copy

# Import additional modules
from mpl_toolkits.basemap import Basemap

# Load netCDF (for manipulating netCDF file types)
from netCDF4 import Dataset

# Set up ticker
import matplotlib.ticker as ticker

#add path for the global function module (file)
import sys
module_path = os.path.abspath(os.path.join('../Global_Functions/'))
#print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

# Load functions
import data_load_functions as data_load_fn
import data_functions as data_fn
import data_IO_functions as data_IO_fn
import data_plot_functions as data_plot_fn

In [None]:
#INPUT Files
# Assign global file names
global_filenames = data_load_fn.load_global_file_names()
State_ANSI_inputfile = global_filenames[0]
#County_ANSI_inputfile = global_filenames[1]
#pop_map_inputfile = global_filenames[2]
Grid_area01_inputfile = global_filenames[3]
Grid_area001_inputfile = global_filenames[4]
#Grid_state001_ansi_inputfile = global_filenames[5]
#Grid_county001_ansi_inputfile = global_filenames[6]
globalinputlocation = global_filenames[0][0:20]
print(globalinputlocation)

# EPA Inventory Data
EPA_NG_inputfile = globalinputlocation+'GHGI/Ch3_Energy/NaturalGasSystems_1990-2018_GHGI_2020-04-11.xlsx'

#proxy mapping file
NG_Mapping_inputfile = './InputData/NaturalGas_TransmissionStorage_ProxyMapping.xlsx'

#Activity Data
#LNG Import/Export terminals
LNGTerminal_inputfile = './InputData/LNG_ImportExport_Terminals.xlsx'

#Active non-associated gas wells
NAgaswell_ongrid_inputfile = './InputData/Map_Enverus_NAGasWellLocations_ongrid.nc'
NAgaswell_offgrid_inputfile = './InputData/Map_Enverus_NAGasWellLocations_offgrid.csv'


#LNG storage facilities
LNG_storage_inputfile = './InputData/annual-liquefied-natural-gas-2010-present/annual_liquefied_natural_gas_'
LNG_storage_Enverus_inputfile = globalinputlocation +'Enverus/Midstream/LNG_Terminals_AllUS_WGS84.xls'
FracTracker_inputfile = './InputData/FracTracker_PeakShaving_WGS84.xls'

#Transmission Pipelines
Enverus_NG_Transpipeline_inputfile = globalinputlocation+ 'Enverus/Midstream/Transmission_Pipelines_CONUS_WGS84.xls'
AKHI_pipelines_shp = globalinputlocation+ 'Enverus/Midstream/Transmission_pipelines_AKHI_wgs84.shp'
CONUS_pipelines_shp = globalinputlocation+ 'Enverus/Midstream/Transmission_pipelines_CONUS_wgs84.shp'

#Crop Land
Cropland_001_inputfile = globalinputlocation + 'Gridded/AllCrops_'

#Transmission Compressor Stations
Enverus_NG_TransStations_inputfile = globalinputlocation+ 'Enverus/Midstream/Transmission_CompressorStations_CONUS_onshore_WGS84.xls'
AKHI_transstat_shp = globalinputlocation+ 'Enverus/Midstream/Transmission_CompressorStations_AKHI_WGS84.shp'
CONUS_transstat_shp = globalinputlocation+ 'Enverus/Midstream/Transmission_CompressorStations_CONUS_onshore_WGS84.shp'

#GHGRP Data
GHGRP_facility_inputfile = './InputData/GHGRP_Facility_Info.csv'
GHGRP_subpartw_inputfile = './InputData/ef_w_emissions_source_ghg.xlsx'

#Storage Compressor Stations
Enverus_NG_StorStations_inputfile = globalinputlocation+ 'Enverus/Midstream/Storage_CompressorStations_AllUS_WGS84.xls'
Enverus_NG_StorFields_inputfile = globalinputlocation+ 'Enverus/Midstream/GasStorage_AllUS_WGS84.xls'
EIA_StorFields_inputfile = './InputData/191 Field Level Storage Data (Annual).xlsx'
EIA_StorFields_locs_inputfile = './InputData/EIA_Natural_Gas_Underground_Storage.xlsx'

#OUTPUT FILES
gridded_outputfile = '../Final_Gridded_Data/EPA_v2_1B2b_Natural_Gas_TransmissionStorage.nc'
netCDF_description = 'Gridded EPA Inventory - Natural Gas Systems Emissions - IPCC Source Category 1B2b - Transmission and Storage'
title_str = "EPA methane emissions from gas transmission and storage"
title_diff_str = "Emissions from gas transmission and storage difference: 2018-2012"

#output gridded proxy data
grid_emi_outputfile = '../Final_Gridded_Data/Extension/v2_input_data/NG_Transmission_Grid_Emi.nc'

In [None]:
# SPECIFY RECALCS

# ReCalculate Cropland array = 1, load from previous file = 0
ReCalc_Cropland = 0

In [None]:
# Define local variables
start_year = 2012  #First year in emission timeseries
end_year = 2018    #Last year in emission timeseries
year_range = [*range(start_year, end_year+1,1)] #List of emission years
year_range_str=[str(i) for i in year_range]
num_years = len(year_range)

# Define constants
Avogadro   = 6.02214129 * 10**(23)  #molecules/mol
Molarch4   = 16.04                  #g/mol
Res01      = 0.1                    # degrees

# Continental US Lat/Lon Limits (for netCDF files)
Lon_left = -130       #deg
Lon_right = -60       #deg
Lat_low  = 20         #deg
Lat_up  = 55          #deg
loc_dimensions = [Lat_low, Lat_up, Lon_left, Lon_right]

ilat_start = int((90+Lat_low)/Res01) #1100:1450 (continental US range)
ilat_end = int((90+Lat_up)/Res01)
ilon_start = abs(int((-180-Lon_left)/Res01)) #500:1200 (continental US range)
ilon_end = abs(int((-180-Lon_right)/Res01))

# Number of days in each month
month_day_leap  = [  31,  29,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]
month_day_nonleap = [  31,  28,  31,  30,  31,  30,  31,  31,  30,  31,  30,  31]
month_tag = ['01','02','03','04','05','06','07','08','09','10','11','12']
month_dict = {'January':1, 'February':2,'March':3,'April':4,'May':5,'June':6, 'July':7,'August':8,'September':9,'October':10,\
             'November':11,'December':12}

# Month arrays
month_range_str = ['January','February','March','April','May','June','July','August','September','October','November','December']
num_months = len(month_range_str)
num_regions = 7

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
//prevent auto-scrolling

In [None]:
# Track run time
ct = datetime.now() 
it = ct.timestamp() 
print("current time:", ct) 

____
## Step 1. Load in State ANSI data, and Area Maps
_____

In [None]:
# State-level ANSI Data
#Read the state ANSI file array
State_ANSI, name_dict, abbr_dict = data_load_fn.load_state_ansi(State_ANSI_inputfile)[0:3]
#QA: number of states
print('Read input file: '+ f"{State_ANSI_inputfile}")
print('Total "States" found: ' + '%.0f' % len(State_ANSI))
print(' ')

# 0.01 x0.01 degree Data
# State ANSI IDs and grid cell area (m2) maps
#state_ANSI_map = data_load_fn.load_state_ansi_map(Grid_state001_ansi_inputfile)
area_map, lat001, lon001 = data_load_fn.load_area_map_001(Grid_area001_inputfile)

# 0.1 x0.1 degree data
# grid cell area and state ANSI maps
Lat01, Lon01 = data_load_fn.load_area_map_01(Grid_area01_inputfile)[1:3]
#Select relevant Continental 0.1 x0.1 domain
Lat_01 = Lat01[ilat_start:ilat_end]
Lon_01 = Lon01[ilon_start:ilon_end]
area_matrix_01 = data_fn.regrid001_to_01(area_map, Lat_01, Lon_01)
area_matrix_01 *= 10000  #convert from m2 to cm2
#state_ANSI_map_01 = data_fn.regrid001_to_01(state_ANSI_map, Lat_01, Lon_01)
del area_map, lat001, lon001, global_filenames

# Print time
ct = datetime.now() 
print("current time:", ct) 

-------------
## Step 2: Read-in and Format Proxy Data
-------------

### Step 2.1 Read In Proxy Mapping File & Make Proxy Arrays

#### Step 2.1.1 Format Proxy Group Arrays

In [None]:
#load GHGI Mapping Groups
names = pd.read_excel(NG_Mapping_inputfile, sheet_name = "GHGI Map - T&S", usecols = "A:B",skiprows = 1, header = 0)
colnames = names.columns.values
ghgi_ts_map = pd.read_excel(NG_Mapping_inputfile, sheet_name = "GHGI Map - T&S", usecols = "A:B", skiprows = 2, names = colnames)
#drop rows with no data, remove the parentheses and ""
ghgi_ts_map = ghgi_ts_map[ghgi_ts_map['GHGI_Emi_Group'] != 'na']
ghgi_ts_map = ghgi_ts_map[ghgi_ts_map['GHGI_Emi_Group'].notna()]
ghgi_ts_map = ghgi_ts_map[ghgi_ts_map['GHGI_Emi_Group'] != '-']
ghgi_ts_map['GHGI_Source']= ghgi_ts_map['GHGI_Source'].str.replace(r"\(","")
ghgi_ts_map['GHGI_Source']= ghgi_ts_map['GHGI_Source'].str.replace(r"\)","")
ghgi_ts_map['GHGI_Source']= ghgi_ts_map['GHGI_Source'].str.replace(r"+","")
ghgi_ts_map.reset_index(inplace=True, drop=True)
display(ghgi_ts_map)

#load emission group - proxy map
names = pd.read_excel(NG_Mapping_inputfile, sheet_name = "Proxy Map - T&S", usecols = "A:D",skiprows = 1, header = 0)
colnames = names.columns.values
proxy_ts_map = pd.read_excel(NG_Mapping_inputfile, sheet_name = "Proxy Map - T&S", usecols = "A:D", skiprows = 1, names = colnames)
display((proxy_ts_map))

#create empty proxy and emission group arrays (add months for proxy variables that have monthly data)
for igroup in np.arange(0,len(proxy_ts_map)):
    if proxy_ts_map.loc[igroup, 'Month_Flag'] == 1:
        vars()[proxy_ts_map.loc[igroup,'Proxy_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years,num_months])
        vars()[proxy_ts_map.loc[igroup,'Proxy_Group']+'_nongrid'] = np.zeros([num_years,num_months])
        vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years,num_months])
    else:
        vars()[proxy_ts_map.loc[igroup,'Proxy_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
        vars()[proxy_ts_map.loc[igroup,'Proxy_Group']+'_nongrid'] = np.zeros([num_years])
        vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([num_years])
        
emi_group_names = np.unique(ghgi_ts_map['GHGI_Emi_Group'])
print('QA/QC: Is the number of emission groups the same for the proxy and emissions tabs?')
if (len(emi_group_names) == len(np.unique(proxy_ts_map['GHGI_Emi_Group']))):
    print('PASS')
else:
    print('FAIL')

#### Step 2.2 Read In EIA Import/Export Terminals

In [None]:
#Read in the DOE Data (from pre-processed input file, see Data and Assumptions document for more details)
names = pd.read_excel(LNGTerminal_inputfile, sheet_name = 'Sheet1', usecols = "A:F",skiprows = 5, header = 0)
colnames = names.columns.values
DOE_LNGTerminals = pd.read_excel(LNGTerminal_inputfile, sheet_name = 'Sheet1',usecols = "A:F", skiprows = 5, names = colnames)
#select only years within range
DOE_LNGTerminals = DOE_LNGTerminals[DOE_LNGTerminals['Year'] <= year_range[-1]]
#display(DOE_LNGTerminals)

#initialize the map arrays
map_InputTerminals = np.zeros([len(Lat_01),len(Lon_01), num_years]) 
map_InputTerminals_nongrid = np.zeros([num_years])
map_ExportTerminals = np.zeros([len(Lat_01),len(Lon_01),num_years]) 
map_ExportTerminals_nongrid = np.zeros([num_years])

# Place the DOE data onto the map arrays
for iterminal in np.arange(0,len(DOE_LNGTerminals)):
    if type(DOE_LNGTerminals['Terminal Latitude'][iterminal]) is str:
        term_lat = float(DOE_LNGTerminals['Terminal Latitude'][iterminal].strip('\u200e'))
    else:
        term_lat = DOE_LNGTerminals['Terminal Latitude'][iterminal]
    
    if type(DOE_LNGTerminals['Terminal Longitude'][iterminal]) is str:
        term_lon = float(DOE_LNGTerminals['Terminal Longitude'][iterminal].strip('\u200e'))
    else:
        term_lon = (DOE_LNGTerminals['Terminal Longitude'][iterminal])
    
    if term_lon > Lon_left and term_lon < Lon_right \
        and term_lat > Lat_low and term_lat < Lat_up:
        ilat = int((term_lat - Lat_low)/Res01)
        ilon = int((term_lon - Lon_left)/Res01)
        iyear = np.where(year_range == DOE_LNGTerminals['Year'][iterminal])[0][0]
        if DOE_LNGTerminals['Terminal Type'][iterminal] =='Import':
            map_InputTerminals[ilat,ilon,iyear] += 1
        elif DOE_LNGTerminals['Terminal Type'][iterminal] =='Export':
            map_ExportTerminals[ilat,ilon,iyear] += 1
    else: 
        #print(iterminal)
        iyear = np.where(year_range == DOE_LNGTerminals['Year'][iterminal])[0][0]
        if DOE_LNGTerminals['Terminal Type'][iterminal] =='Import':
            map_InputTerminals_nongrid[iyear] += 1
        elif DOE_LNGTerminals['Terminal Type'][iterminal] =='Export':
            map_ExportTerminals_nongrid[iyear] += 1 

###NOTE: CORRECT FOR YEARS WITH NO EXPORT TERMINAL DATA
map_ExportTerminals[:,:,0] = map_ExportTerminals[:,:,1]
map_ExportTerminals[:,:,2] = map_ExportTerminals[:,:,4]
map_ExportTerminals[:,:,3] = map_ExportTerminals[:,:,4]

for iyear in np.arange(0,num_years):
    print('Year: ',year_range_str[iyear])
    print('Total Import Terminals on grid: ',np.sum(map_InputTerminals[:,:,iyear]))
    print('Total Import Terminals off grid: ',np.sum(map_InputTerminals_nongrid[iyear]))
    print('Total Export Terminals on grid: ',np.sum(map_ExportTerminals[:,:,iyear]))
    print('Total Export Terminals off grid: ',np.sum(map_ExportTerminals_nongrid[iyear]))

#### Step 2.3 Read In Enverus Non-Associated Gas Wells (pre-processed in GEPA Production script)
New proxy now formatted in Step 2.8

#### Step 2.4 LNG Storage Station Proxy Data

##### Step 2.4.1 Read In PHMSA Data (master list)

In [None]:
#Read in the PHMSA Data (see Data and Assumptions document for more details)
PHMSA_LNGstorage = np.zeros(0)

print("Number of In Service LNG Stations:")
for iyear in np.arange(0,num_years):
    names = pd.read_excel(LNG_storage_inputfile+year_range_str[iyear]+'.xlsx', sheet_name = 'LNG AR Part B', skiprows = 2, header = 0)
    colnames = names.columns.values
    temp = pd.read_excel(LNG_storage_inputfile+year_range_str[iyear]+'.xlsx', sheet_name = 'LNG AR Part B',skiprows = 2, names = colnames)
    #temp = PHMSA_LNGstorage
    temp = temp[['REPORT_YEAR','FACILITY_NAME','PARTA2NAMEOFCOMP','TOTAL_CAPACITY_BBLS','TOTAL_CAPACITY_MMCF','FACILITY_STATE','FACILITY_ZIP_CODE','FACILITY_STATUS','TYPE_OF_FACILITY','FUNCTION_OF_FACILITY']]
    temp = temp[temp['FUNCTION_OF_FACILITY'].isin(['Storage w/ Liquefaction','Storage w/o Liquefaction','Storage w/ Both'])]
    temp = temp[temp['FACILITY_STATUS'] == 'In Service']
    temp = temp.fillna('')
    if iyear ==0:
        PHMSA_LNGstorage = temp
    else:
        PHMSA_LNGstorage = PHMSA_LNGstorage.append(temp)
    print('Year',year_range_str[iyear],': ', len(temp))

PHMSA_LNGstorage['Lat']=0
PHMSA_LNGstorage['Lon']=0
PHMSA_LNGstorage.reset_index(drop=True,inplace=True)

##### Step 2.4.2 Read In Enverus Midstream LNG station data

In [None]:
#Read in Enverus Midstream LNG station data (not complete and includes non-peak shaving facilities as well)
LNG_storage_Enverus_inputfile
names = pd.read_excel(LNG_storage_Enverus_inputfile, skiprows = 0, header = 0)
colnames = names.columns.values
Enverus_LNGstations = pd.read_excel(LNG_storage_Enverus_inputfile, skiprows = 0, names = colnames)
Enverus_LNGstations = Enverus_LNGstations[['NAME','OPERATOR','TYPE','CAP_STO','STATE_NAME','CNTY_NAME','Longitude','Latitude']]
Enverus_LNGstations = Enverus_LNGstations[~Enverus_LNGstations['STATE_NAME'].isin(['Alaska','Hawaii'])]
Enverus_LNGstations.reset_index(drop=True,inplace=True)

##### Step 2.4.3 Find matching plant locations (PHMSA/Enverus)

In [None]:
#Find matching plant locations between PHMSA and Enverus and add Lat/Lons
for istation in np.arange(0,len(PHMSA_LNGstorage)):
    temp_state = PHMSA_LNGstorage.loc[istation,'FACILITY_STATE']
    temp_name = PHMSA_LNGstorage.loc[istation,'FACILITY_NAME'].lower()
    temp_name = temp_name.replace("lng","")
    #print(temp_state)
    state_name = State_ANSI.loc[State_ANSI['abbr']==temp_state,'name'].values[0]
    #select matching state values in Enverus dataset
    temp_enverus = Enverus_LNGstations[Enverus_LNGstations['STATE_NAME']== state_name]
    temp_enverus.reset_index(drop=True,inplace=True)
    splitname1 = temp_name.split()
    num_match = 0
    for ienverus in np.arange(0,len(temp_enverus)):
        temp_name2 = temp_enverus.loc[ienverus,'NAME'].lower()
        test = temp_enverus.loc[ienverus,'CNTY_NAME'].lower()
        temp_name2 = temp_name2.replace("lng","")
        splitname2 = temp_name2.split()
        splitname2.append(test)
        #print(splitname2)
        if bool(set(splitname1) & set(splitname2)):
            num_match +=1
            PHMSA_LNGstorage.loc[istation,'Lat'] = temp_enverus.loc[ienverus,'Latitude']
            PHMSA_LNGstorage.loc[istation,'Lon'] = temp_enverus.loc[ienverus,'Longitude']
            #print('match found, ', temp_name, ',',splitname2)
        if 'chatanooga' in splitname2 and 'chattanooga' in splitname1:
            num_match +=1
            PHMSA_LNGstorage.loc[istation,'Lat'] = temp_enverus.loc[ienverus,'Latitude']
            PHMSA_LNGstorage.loc[istation,'Lon'] = temp_enverus.loc[ienverus,'Longitude']
            #print('match found, ', temp_name, ',',splitname2)
        if num_match > 1:
            print('STOP')
            print(splitname1)
            print(temp_enverus)
            break
    if num_match ==0:
        continue

#display(PHMSA_LNGstorage)
print('QA/QC: Number of PHMSA LNG storage stations Missing Lat/Lons:', len(PHMSA_LNGstorage[PHMSA_LNGstorage['Lon']==0]))

##### Step 2.4.4 Read In Frac Tracker Peak Shavers Data

In [None]:
#Read in FracTracker Peak Shaving station data (see Data dn Assumptions for further details)
names = pd.read_excel(FracTracker_inputfile, skiprows = 0, header = 0)
colnames = names.columns.values
FracTracker_PeakShavers = pd.read_excel(FracTracker_inputfile, skiprows = 0, names = colnames)
FracTracker_PeakShavers = FracTracker_PeakShavers[['Company','City','State','Zip','Longitude','Latitude']]
FracTracker_PeakShavers = FracTracker_PeakShavers[~FracTracker_PeakShavers['State'].isin(['Alaska','Hawaii'])]
FracTracker_PeakShavers.reset_index(drop=True,inplace=True)

for iplant in np.arange(0,len(FracTracker_PeakShavers)):
    temp_zip = FracTracker_PeakShavers.loc[iplant,'Zip']
    temp_zip = temp_zip.replace('.','0')
    temp_zip = temp_zip.zfill(5)
    FracTracker_PeakShavers.loc[iplant,'Zip'] = temp_zip

##### Step 2.4.5 Find remaining matching plant locations (PHMSA/Frack Tracker)

In [None]:
# Match PHMSA data based on zip code first, then any matching words in the operator and city names

#Find matching plant locations between PHMSA and Enverus and add Lat/Lons
for istation in np.arange(0,len(PHMSA_LNGstorage)):
    if PHMSA_LNGstorage.loc[istation,'Lat'] ==0:
        #print(istation)
        temp_state = PHMSA_LNGstorage.loc[istation,'FACILITY_STATE']
        temp_zip = PHMSA_LNGstorage.loc[istation,'FACILITY_ZIP_CODE']
        match = np.where(FracTracker_PeakShavers['Zip'] == temp_zip)
        if match[0].size:
            if len(match[0]==1):
                #continue
                #num_match +=1
                PHMSA_LNGstorage.loc[istation,'Lat'] = FracTracker_PeakShavers.loc[match[0][0],'Latitude']
                PHMSA_LNGstorage.loc[istation,'Lon'] = FracTracker_PeakShavers.loc[match[0][0],'Longitude']
            else:
                print('error', istation)
        elif temp_state =='AK' or temp_state =='HI':
            continue
            #don't need to find lat/lon for AK or HI plants
        else:
            #print(istation, temp_state)
            #try matching based on company name, state, and city (also hand-correct stations that can't be found)
            temp_state = PHMSA_LNGstorage.loc[istation,'FACILITY_STATE']
            #make list of company and facility name to try to match later
            temp_name = PHMSA_LNGstorage.loc[istation,'PARTA2NAMEOFCOMP'].lower()
            temp_name = temp_name.replace("lng","")
            test = PHMSA_LNGstorage.loc[istation,'FACILITY_NAME']
            temp_name = temp_name+' '+test.lower()
            splitname1 = temp_name.split()
            state_name = State_ANSI.loc[State_ANSI['abbr']==temp_state,'name'].values[0]
            #select FracTracker facilities within the current state
            temp_frac = FracTracker_PeakShavers[FracTracker_PeakShavers['State']== state_name]
            temp_frac.reset_index(drop=True,inplace=True)
            num_match = 0
            ifrac_list = []
            match_list = []
            for ifrac in np.arange(0,len(temp_frac)):
                #capture all the fractracker data within a given state that has matching components of the
                # company and city name
                #make list of company, city name to compare to the PHMSA list
                temp_name2 = temp_frac.loc[ifrac,'Company'].lower()
                test = temp_frac.loc[ifrac,'City']
                temp_name2 = temp_name2.replace("lng","")
                temp_name2 = temp_name2+' '+test.lower()
                splitname2 = temp_name2.split()
                if bool(set(splitname1) & set(splitname2)):
                    #if there are words that match, record that datapoint from FracTracker
                    num_match +=1
                    match_len = len(set(splitname1) & set(splitname2))
                    ifrac_list = ifrac_list+[ifrac]
                    match_list = match_list+[match_len]
            if num_match == 1:
                #if only one match - assign value
                PHMSA_LNGstorage.loc[istation,'Lat'] = temp_frac.loc[ifrac_list[0],'Latitude']
                PHMSA_LNGstorage.loc[istation,'Lon'] = temp_frac.loc[ifrac_list[0],'Longitude']
            elif num_match > 1:
                #if more than one match, assign best possible guess (and hand correct select stations)
                if min(match_list) != max(match_list):
                    # assign based on which entry has the largest number of matching words
                    imax = match_list.index(max(match_list))
                    PHMSA_LNGstorage.loc[istation,'Lat'] = temp_frac.loc[ifrac_list[imax],'Latitude']
                    PHMSA_LNGstorage.loc[istation,'Lon'] = temp_frac.loc[ifrac_list[imax],'Longitude']
                else:
                    #of assign by hand if same number of matching words
                    if set(splitname1) == set(['alabama', 'gas', 'corporation', 'east', 'lauderdale', 'lng', 'facility']) or \
                        set(splitname1) == set(['spire', 'alabama', 'inc.', 'east', 'lauderdale', 'lng', 'facility']):
                        #print('here', istation, splitname1)
                        #use Anderson location
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 34.928418
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -87.266407
                    elif set(splitname1) == set(['essex', 'county', 'gas', 'co', 'lng', 'plant', 'haverhill-ma']):
                        #havermill lat/lon
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 42.785666
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -71.088676
                    elif set(splitname1) == set(['northwest', 'natural', 'gas', 'co', 'gasco', 'lng', 'plant']):
                        #portland loc
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 45.525211
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -122.672080
                    elif set(splitname1) == set(['philadelphia', 'gas', 'works', 'passyunk_lng']):
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 39.978667
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -75.148777
                    elif set(splitname1) == set(['philadelphia', 'gas', 'works', 'richmond_lng']):
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 39.984250
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -75.088662
                    elif set(splitname1) == set(['energy', 'north', 'natural', 'gas', 'inc', 'lng', 'plant', 'manchester-nh']):
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 42.995640
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -71.454789
                    elif set(splitname1) == set(['energy', 'north', 'natural', 'gas', 'inc', 'lng', 'plant', 'tilton-nh']):
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 43.456485
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -71.565167
                    elif set(splitname1) == set(['energy', 'north', 'natural', 'gas', 'inc', 'lng', 'plant', 'concord-nh']):
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 43.198320
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -71.540134
                    elif set(splitname1) == set(['midamerican', 'energy', 'company', 'wat', 'lng', 'plant'])  :
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 42.508137
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -92.347521
                    elif set(splitname1) == set(['midamerican', 'energy', 'company', 'bet', 'lng', 'plant'])  :
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 41.564233
                        PHMSA_LNGstorage.loc[istation,'Lon'] = -90.476182
                    elif set(splitname1) == set(['ugi', 'energy', 'services', 'temple', 'lng', 'plant'])  :
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 40.421699
                        PHMSA_LNGstorage.loc[istation,'Lon'] =  -75.927026
                    elif set(splitname1) == set(['ugi', 'energy', 'services', 'steelton', 'lng'])  :
                        PHMSA_LNGstorage.loc[istation,'Lat'] = 40.237811
                        PHMSA_LNGstorage.loc[istation,'Lon'] =  -76.851624   
                    else:
                        print('Review', istation, 'here1')
                        print(PHMSA_LNGstorage.iloc[istation,:])
                        print(splitname1)
                        print(temp_frac)
            elif num_match ==0:
                #if there were no matching words, then assign by hand
                if set(splitname1) == set(['puget', 'sound', 'energy', 'lng', 'mobile', 'system']) or \
                          set(splitname1) == set(['puget', 'sound', 'energy', 'gig', 'harbor', 'satellite'])  :
                    PHMSA_LNGstorage.loc[istation,'Lat'] = 47.327118
                    PHMSA_LNGstorage.loc[istation,'Lon'] = -122.579219
                elif set(splitname1) == set(['north', 'dakota', 'llc', 'tioga', 'plant']):
                    PHMSA_LNGstorage.loc[istation,'Lat'] = 48.402600
                    PHMSA_LNGstorage.loc[istation,'Lon'] = -102.918507
                elif set(splitname1) == set(['hr', 'nu', 'blu', 'energy,', 'llc', 'hr', 'nu', 'blu', 'energy']):
                    PHMSA_LNGstorage.loc[istation,'Lat'] = 30.496414
                    PHMSA_LNGstorage.loc[istation,'Lon'] = -91.223587
                elif set(splitname1) == set(['indy,', 'llc', 'lng', 'south'])  :
                    PHMSA_LNGstorage.loc[istation,'Lat'] = 39.717421
                    PHMSA_LNGstorage.loc[istation,'Lon'] =  -86.068010
                elif set(splitname1) == set(['indy,', 'llc', 'lng', 'north'])  :
                    PHMSA_LNGstorage.loc[istation,'Lat'] = 39.915464
                    PHMSA_LNGstorage.loc[istation,'Lon'] =  -86.239457
                elif set(splitname1) == set(['centerpoint', 'energy', 'resources', 'corp.,', 'dba', 'centerpoint', 'energy', 'minnesota', 'gas', 'waterbath', 'portable'])  :
                    PHMSA_LNGstorage.loc[istation,'Lat'] = 30.031979
                    PHMSA_LNGstorage.loc[istation,'Lon'] =  -95.441862 
                else:
                    print('Review', istation, 'here2')
                    print(PHMSA_LNGstorage.iloc[istation,:])
                    print(splitname1)

#display(PHMSA_LNGstorage)
print('QA/QC: Number of PHMSA LNG storage stations Missing Lat/Lons (should be at least 2 AK plants per year):', len(PHMSA_LNGstorage[PHMSA_LNGstorage['Lon']==0]))

##### Step 2.4.6 Put LNG Storage Station (Storage Capacity) onto CONUS Grid

In [None]:
#Put LNG data onto Grid
# This will include any AK & HI data in the nongrid data

#initialize the map arrays
Map_LNGstations = np.zeros([len(Lat_01),len(Lon_01), num_years]) 
Map_LNGstations_nongrid = np.zeros([num_years])

# Place the DOE data onto the map arrays
for istation in np.arange(0,len(PHMSA_LNGstorage)):
    term_lat = PHMSA_LNGstorage['Lat'][istation]
    term_lon = (PHMSA_LNGstorage['Lon'][istation])
    #print(iterminal, (term_lat), (term_lon))
    
    if term_lon > Lon_left and term_lon < Lon_right \
        and term_lat > Lat_low and term_lat < Lat_up:
        ilat = int((term_lat - Lat_low)/Res01)
        ilon = int((term_lon - Lon_left)/Res01)
        iyear = np.where(year_range == PHMSA_LNGstorage['REPORT_YEAR'][istation])[0][0]
        Map_LNGstations[ilat,ilon,iyear] += PHMSA_LNGstorage['TOTAL_CAPACITY_BBLS'][istation]
    else:
        #print(iterminal)
        iyear = np.where(year_range == PHMSA_LNGstorage['REPORT_YEAR'][istation])[0][0]
        Map_LNGstations_nongrid[iyear] += PHMSA_LNGstorage['TOTAL_CAPACITY_BBLS'][istation]

print('QA/QC: Total Storage Capacity (bbl)')
for iyear in np.arange(0,num_years):
    print('YEAR ',year_range_str[iyear])
    print('on grid: ',np.sum(Map_LNGstations[:,:,iyear]))
    print('off grid: ',np.sum(Map_LNGstations_nongrid[iyear]))


#### Step 2.5 Read In Transmission Pipeline Miles & Transmission Miles Over Cropland

In [None]:
# 1) Read In Enverus Gathering Pipeline Data (pre-processed in ArcMap), only contains CONUS info
# 2) Read In Cropland data (and re-grid to 0.1x0.1 resolution)
# 3) Make a proxy map with the length of pipeline in each grid cell. For grid cells where the cropland area
#    in that grid cell is > 0, count those miles towards the sum of pipeline miles intersecting crop land. 
# 4) Calculate the ratio of GB infrastructure in AK & HI compared to the national onshore total
#    (assume all M&R Farm tap emissions occur in the CONUS region - alternatively, could also apply the same trans.
#    pipeline AKHI fraction to this emissions group in Step 4)

if ReCalc_Cropland ==1:
    #Step 1)
    Env_TransPipelines_loc = pd.read_excel(Enverus_NG_Transpipeline_inputfile, usecols= "C:G", header = 0)
    Map_EnvTrans_pipelines = np.zeros([len(Lat_01),len(Lon_01),num_years]) #data represent a snapshot in time that is applied to entire timeseries
    Map_EnvTrans_pipelines_nongrid = np.zeros([num_years])
    Map_Farm_pipelines = np.zeros([len(Lat_01),len(Lon_01),num_years])
    Map_Farm_pipelines_nongrid = np.zeros([num_years]) 
    Cropland = np.zeros([len(Lat_01),len(Lon_01),num_years])
    Cropland_nongrid = np.zeros([num_years])
    Map_Farm_pipelines = np.zeros([len(Lat_01),len(Lon_01),num_years])
    Map_Farm_pipelines_nongrid = np.zeros([num_years])
    #display(Env_GathPipelines_loc)

    #allocation is based on the relative pipeline length in each grid cell (pre-processed in ArcGIS)
    # Note that the sum mileage in each grid cell != original dataset mileage due to changes when data was projected

    # Step 2) 
    for iyear in np.arange(0,num_years):
        Cropland_001 = pd.read_csv(Cropland_001_inputfile+year_range_str[iyear]+'_001x001.csv')
        #re-grid from 0.01 to 0.1 resolution
        for idx in np.arange(0,len(Cropland_001)):
            if Cropland_001['FIRST_Longitude'][idx] > Lon_left and Cropland_001['FIRST_Longitude'][idx] < Lon_right and \
                Cropland_001['FIRST_Latitude'][idx] > Lat_low and Cropland_001['FIRST_Latitude'][idx] < Lat_up:
                #Set ilon and ilat
                ilat = int((Cropland_001['FIRST_Latitude'][idx]  - Lat_low)/Res01)
                ilon = int((Cropland_001['FIRST_Longitude'][idx] - Lon_left)/Res01)
                Cropland[ilat,ilon,iyear] += Cropland_001['SUM_Area_AllCrops'][idx]
            else:
                Cropland_nongrid[iyear] += Cropland_001['SUM_Area_AllCrops'][idx]
        del Cropland_001

        # Step 3)
        for iloc in np.arange(0,len(Env_TransPipelines_loc)):
            if Env_TransPipelines_loc['Longitude'][iloc] > Lon_left and Env_TransPipelines_loc['Longitude'][iloc] < Lon_right \
                and Env_TransPipelines_loc['Latitude'][iloc] > Lat_low and Env_TransPipelines_loc['Latitude'][iloc] < Lat_up:
                ilat = int((Env_TransPipelines_loc['Latitude'][iloc] - Lat_low)/Res01)
                ilon = int((Env_TransPipelines_loc['Longitude'][iloc] - Lon_left)/Res01)
                Map_EnvTrans_pipelines[ilat,ilon,iyear] += Env_TransPipelines_loc['SUM_Shape_'][iloc]
                if Cropland[ilat,ilon,iyear] > 0:
                    Map_Farm_pipelines[ilat,ilon,iyear] += Env_TransPipelines_loc['SUM_Shape_'][iloc]
                #else:
                #    Map_Farm_pipelines_nongrid[iyear] += Env_TransPipelines_loc['SUM_Shape_'][iloc]
            else:
                Map_EnvTrans_pipelines_nongrid[iyear] += Env_TransPipelines_loc['SUM_Shape_'][iloc]
                #Map_Farm_pipelines_nongrid[iyear] += Env_TransPipelines_loc['SUM_Shape_'][iloc]
            
        print('Year: ',year_range[iyear])    
        print('Total Transmission Pipeline length on grid: ',np.sum(Map_EnvTrans_pipelines[:,:,iyear]))
        print('Total Transmission Pipeline length off grid: ',np.sum(Map_EnvTrans_pipelines_nongrid[iyear]))
        print('Total Transmission Pipeline length on cropland: ',np.sum(Map_Farm_pipelines[:,:,iyear]))
        print('Total Transmission Pipeline length not on cropland: ',np.sum(Map_Farm_pipelines_nongrid[iyear]))

    
    #Step 4)
    #1. Open Transmission_pipelines_wgs.shp
    #2. sum the miles field
    #3. Open Transmission_pipelines_AKHI_wgs84.shp
    #4. sum the miles field
    #5. Ratio the AKHI miles / (conus + AKHI miles)
    #6. Apply this ratio and subtract from the GB pipeline fields (save this fraction as 'not_mapped')
    # Assume all M&R tap emissions occur in the CONUS region. 

    shape = shp.Reader(AKHI_pipelines_shp)
    AKHI_miles = 0
    for rec in shape.iterRecords():
        AKHI_miles += rec['MILES']
    print('Miles in AK & HI: ', AKHI_miles)
    
    shape = shp.Reader(CONUS_pipelines_shp)
    CONUS_miles = 0
    for rec in shape.iterRecords():
        CONUS_miles += rec["MILES"]
    print('Miles in CONUS: ', CONUS_miles)

    #apply this fraction and subtract from the national pipeline emissions in step 4 (the non-grid data is zero)
    CONUS_transpipe_ratio = AKHI_miles/CONUS_miles
    print('Fraction of Miles Outside CONUS: ', CONUS_transpipe_ratio)

    np.savez('./IntermediateOutputs/Pipelines_Transmission', x=Map_EnvTrans_pipelines, y=Map_EnvTrans_pipelines_nongrid)
    np.savez('./IntermediateOutputs/Pipelines_Farms', x=Map_Farm_pipelines, y=Map_Farm_pipelines_nongrid)
    np.save('./IntermediateOutputs/CONUS_TransPipeline_Ratio', CONUS_transpipe_ratio)
else:
    npzfile = np.load('./IntermediateOutputs/Pipelines_Transmission.npz')
    Map_EnvTrans_pipelines = npzfile['x']
    Map_EnvTrans_pipelines_nongrid = npzfile['y']
    npzfile = np.load('./IntermediateOutputs/Pipelines_Farms.npz')
    Map_Farm_pipelines = npzfile['x']
    Map_Farm_pipelines_nongrid = npzfile['y']
    CONUS_transpipe_ratio = np.load('./IntermediateOutputs/CONUS_TransPipeline_Ratio.npy')
    
    for iyear in np.arange(0, num_years):
        print('Year: ',year_range[iyear])    
        print('Total Transmission Pipeline length on grid: ',np.sum(Map_EnvTrans_pipelines[:,:,iyear]))
        print('Total Transmission Pipeline length off grid: ',np.sum(Map_EnvTrans_pipelines_nongrid[iyear]))
        print('Total Transmission Pipeline length on cropland: ',np.sum(Map_Farm_pipelines[:,:,iyear]))
        print('Total Transmission Pipeline length not on cropland: ',np.sum(Map_Farm_pipelines_nongrid[iyear]))
    print('Fraction of Miles Outside CONUS: ', CONUS_transpipe_ratio)

#### Step 2.6 Transmission Compressor Stations

##### Step 2.6.1 Read in Enverus data

In [None]:
#Read in Enverus Midstream Transmission Compressor station data (onshore only)
# Need to read in AK/HI data and CONUS onshore data

#Step 1)
# Read in CONUS onshore counts
names = pd.read_excel(Enverus_NG_TransStations_inputfile, skiprows = 0, header = 0)
colnames = names.columns.values
Enverus_Trans_CompStations = pd.read_excel(Enverus_NG_TransStations_inputfile, skiprows = 0, names = colnames)
Enverus_Trans_CompStations = Enverus_Trans_CompStations[['NAME','OPERATOR','TYPE','FUEL_MCFD','HP','STATE_NAME','CNTY_NAME','Longitude','Latitude']]
#Enverus_Trans_CompStations = Enverus_Trans_CompStations[~Enverus_Trans_CompStations['STATE_NAME'].isin(['Alaska','Hawaii'])]
Enverus_Trans_CompStations.reset_index(drop=True,inplace=True)
#print(colnames)

#Step 2)
# Calculate average Fuel Useage to HP ratio and then fill in where possible
#Estimate Fuel Useage based on HorsePower, we'll then use Fuel Useage to allocate emissions below
Fuel_HP_ratio = np.mean(Enverus_Trans_CompStations['FUEL_MCFD']\
                        [(Enverus_Trans_CompStations['FUEL_MCFD']>0) & (Enverus_Trans_CompStations['HP']>0)] \
                        / Enverus_Trans_CompStations['HP'][(Enverus_Trans_CompStations['FUEL_MCFD']>0) & (Enverus_Trans_CompStations['HP']>0)])
for index in np.arange(len(Enverus_Trans_CompStations)):
    if Enverus_Trans_CompStations['FUEL_MCFD'][index] == 0:
        Enverus_Trans_CompStations.loc[index,'FUEL_MCFD'] = Fuel_HP_ratio * Enverus_Trans_CompStations['HP'][index]
#if Fuel_MCFD is still zero, fill with median value
median_mcfd = np.median(Enverus_Trans_CompStations.loc[Enverus_Trans_CompStations['FUEL_MCFD'] > 0,'FUEL_MCFD'])
#print(median_mcfd)
for index in np.arange(len(Enverus_Trans_CompStations)):
    if Enverus_Trans_CompStations['FUEL_MCFD'][index] == 0:
        Enverus_Trans_CompStations.loc[index,'FUEL_MCFD'] = median_mcfd
display(Enverus_Trans_CompStations)

#Step 3)
#1. Sum CONUS onshore transmission compressor stations
#2. Sum AK/HI transmission compressor stations
#4. Ratio the AKHI miles / (conus + AKHI miles)
#6. Apply this ratio and subtract from the Transmission Compressor Stations emi group (save this fraction as 'not_mapped')

shape = shp.Reader(AKHI_transstat_shp)
AKHI_counts = 0
#for rec in shape.iterRecords():
AKHI_counts = len(shape)
print('Stations in AK & HI: ', AKHI_counts)

shape = shp.Reader(CONUS_transstat_shp)
CONUS_counts = 0
#for rec in shape.iterRecords():
CONUS_counts = len(shape)
print('Stations in CONUS: ', CONUS_counts)

#apply this fraction and subtract from the national pipeline emissions in step 4 (the non-grid data is zero)
#CONUS_transstat_ratio = AKHI_counts/(CONUS_counts + AKHI_counts)
#print('Fraction of Stations Outside CONUS: ', CONUS_transstat_ratio)

##### 2.6.2 Read In GHGRP Data

In [None]:
#a) Read in the GHGRP data
# emissions of methane reported in metric ton
facility_info = pd.read_csv(GHGRP_facility_inputfile)
facility_emissions = pd.read_excel(GHGRP_subpartw_inputfile,sheet_name = 'Export Worksheet')
facility_emissions = facility_emissions[facility_emissions['INDUSTRY_SEGMENT'] =='Onshore natural gas transmission compression [98.230(a)(4)]']
facility_emissions = facility_emissions[facility_emissions['TOTAL_REPORTED_CH4_EMISSIONS'] >0]
facility_emissions = facility_emissions[facility_emissions['REPORTING_YEAR'] <= year_range[-1]]
facility_emissions.reset_index(drop=True,inplace=True)
#print(facility_emissions)

facility_emissions['State'] = ''
facility_emissions['County'] = ''
facility_emissions['City'] = ''
facility_emissions['Zip'] = 0
facility_emissions['Lat'] = 0
facility_emissions['Lon'] = 0

#b) match GHGRP facility and emissions data
# for each entry in the data file (each facility each year), match the facility ID to the ID in the
# GHGRP facility info file, then append the corresponding location data to the emissions array
for index in np.arange(len(facility_emissions)):
    #print(index)
    ilocation = np.where(facility_info['V_GHG_EMITTER_FACILITIES.FACILITY_ID'] == facility_emissions['FACILITY_ID'][index])[0][0]
    #for iloc in len(ilocation)
    facility_emissions.loc[index, 'State'] = facility_info['V_GHG_EMITTER_FACILITIES.STATE'][ilocation]
    facility_emissions.loc[index, 'County'] = facility_info['V_GHG_EMITTER_FACILITIES.COUNTY'][ilocation]
    facility_emissions.loc[index, 'City'] = facility_info['V_GHG_EMITTER_FACILITIES.CITY'][ilocation]
    facility_emissions.loc[index, 'Zip'] = facility_info['V_GHG_EMITTER_FACILITIES.ZIP'][ilocation]
    facility_emissions.loc[index, 'Lat'] = facility_info['V_GHG_EMITTER_FACILITIES.LATITUDE'][ilocation]
    facility_emissions.loc[index, 'Lon'] = facility_info['V_GHG_EMITTER_FACILITIES.LONGITUDE'][ilocation]


    # make station-specific arrays for each year (with emissions in Tg)
print('QA/QC: Check that all GHGRP emissions are allocated to specific stations')
for iyear in np.arange(0,num_years):
    facility_emissions_temp = facility_emissions[facility_emissions['REPORTING_YEAR'] ==year_range[iyear]]
    facility_emissions_temp.reset_index(drop=True,inplace=True)
    GHGRP_transstations = pd.DataFrame({'FID':facility_emissions_temp['FACILITY_ID'].unique()})
    GHGRP_transstations['Name'] = ' '
    GHGRP_transstations['State'] = ' '
    GHGRP_transstations['County'] = ' '
    GHGRP_transstations['City'] = ' '
    GHGRP_transstations['Zip'] = 0
    GHGRP_transstations['Lat'] = 0.0
    GHGRP_transstations['Lon'] = 0.0
    GHGRP_transstations['TgCH4'] = 0.0

    #Put everything in per-station array
    for idx in np.arange(len(facility_emissions_temp)):
        iFID = np.where(GHGRP_transstations['FID'] == facility_emissions_temp['FACILITY_ID'][idx])[0][0]
        GHGRP_transstations.loc[iFID,'Name']   = facility_emissions_temp['FACILITY_NAME'][idx]
        GHGRP_transstations.loc[iFID,'State']  = facility_emissions_temp['State'][idx]
        GHGRP_transstations.loc[iFID,'County'] = facility_emissions_temp['County'][idx]
        GHGRP_transstations.loc[iFID,'City'] = facility_emissions_temp['City'][idx]
        GHGRP_transstations.loc[iFID,'Zip']    = facility_emissions_temp['Zip'][idx]
        GHGRP_transstations.loc[iFID,'Lat']    = facility_emissions_temp['Lat'][idx]
        GHGRP_transstations.loc[iFID,'Lon']    = facility_emissions_temp['Lon'][idx]
        GHGRP_transstations.loc[iFID,'TgCH4'] += facility_emissions_temp['TOTAL_REPORTED_CH4_EMISSIONS'][idx]/1e6
    
    vars()['GHGRP_transstations'+'_'+year_range_str[iyear]] = GHGRP_transstations
    diff1 = abs(facility_emissions_temp['TOTAL_REPORTED_CH4_EMISSIONS'].sum()/1e6 -GHGRP_transstations['TgCH4'].sum())/ \
        ((facility_emissions_temp['TOTAL_REPORTED_CH4_EMISSIONS'].sum()/1e6 + GHGRP_transstations['TgCH4'].sum())/2)
    #print(summary_emi)
    #print(sum_emi2[iyear])
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL: ', diff1,'%') 
    print('Number of GHGRP Transmission Stations: ', len(vars()['GHGRP_transstations'+'_'+year_range_str[iyear]]))

##### Step 2.6.3. Match Transmission Compressor Stations between Enverus and GHGRP

In [None]:
# For each year of GHGRP data, match GHGRP transmission compressor stations to Enverus data 
# (based on nearest location, not name)
# note there is only one available year of Enverus data
# also record the station daily fuel usage for later calculations

found = 0
DEBUG=0

print('QA/QC: Number of GHGRP Trans. Compressor Stations not in Enverus dataset')
for iyear in np.arange(0,num_years):
    GHGRP_temp_data = vars()['GHGRP_transstations'+'_'+year_range_str[iyear]].copy()

    GHGRP_temp_data['match_flag'] = 0
    GHGRP_temp_data['Env_name'] = ''
    GHGRP_temp_data['Env_county'] = ''
    GHGRP_temp_data['Env_state'] = ''
    GHGRP_temp_data['Env_FuelUseage'] = 0
    Enverus_Trans_CompStations['match_flag'] = 0

    #First, find exact matching lat/lon facilities
    for istation in np.arange(0,len(GHGRP_temp_data)):
        matched = np.where((np.abs(Enverus_Trans_CompStations['Latitude']-GHGRP_temp_data['Lat'][istation]) < 0.2) & \
                              (np.abs(Enverus_Trans_CompStations['Longitude']-GHGRP_temp_data['Lon'][istation]) < 0.2))[0]
        #print(np.size(matched))
        if np.size(matched)==1: #if exactly one station within 0.1 degrees
            #print('HERE')
            Enverus_Trans_CompStations.loc[matched[0],'match_flag'] = 1
            GHGRP_temp_data.loc[istation,'match_flag'] = 1
            GHGRP_temp_data.loc[istation,'Env_name'] = Enverus_Trans_CompStations.loc[matched[0], 'NAME']
            GHGRP_temp_data.loc[istation,'Env_county'] = Enverus_Trans_CompStations.loc[matched[0], 'CNTY_NAME']
            GHGRP_temp_data.loc[istation,'Env_state'] = Enverus_Trans_CompStations.loc[matched[0], 'STATE_NAME']
            GHGRP_temp_data.loc[istation,'Env_FuelUseage'] = Enverus_Trans_CompStations.loc[matched[0], 'FUEL_MCFD']

        elif np.size(matched) > 1: #if more than one station within <0.1 degrees, find nearest match
            dist_calc = np.zeros(len(matched))
            GHGRP_temp_data.loc[istation,'match_flag'] = 1
            for imatch in np.arange(len(dist_calc)): #loop through the matching stations to find the closest match
                dist_calc[imatch] = np.abs(GHGRP_temp_data.loc[istation,'Lat'] - \
                                           Enverus_Trans_CompStations.loc[matched[imatch],'Latitude'])**2 + \
                               np.abs(GHGRP_temp_data.loc[istation,'Lon'] - Enverus_Trans_CompStations.loc[matched[imatch],'Longitude'])**2
            bestpick = np.where(dist_calc == dist_calc.min())[0][0]

            if len(np.where(dist_calc == dist_calc.min())[0]) == 1: #if there is only one closest match, assign the correct data
                Enverus_Trans_CompStations.loc[matched[bestpick],'match_flag'] = 1
                GHGRP_temp_data.loc[istation,'match_flag'] = 1
                GHGRP_temp_data.loc[istation,'Env_name'] = Enverus_Trans_CompStations.loc[matched[bestpick], 'NAME']
                GHGRP_temp_data.loc[istation,'Env_county'] = Enverus_Trans_CompStations.loc[matched[bestpick], 'CNTY_NAME']
                GHGRP_temp_data.loc[istation,'Env_state'] = Enverus_Trans_CompStations.loc[matched[bestpick], 'STATE_NAME']
                GHGRP_temp_data.loc[istation,'Env_FuelUseage'] = Enverus_Trans_CompStations.loc[matched[bestpick], 'FUEL_MCFD']
            else: #if there is more than one match, sum the fuel usage from all matching stations and assign average to GHGRP array
                best_array = np.where(dist_calc == dist_calc.min())[0]
                total_use = 0.0
                nonzero_use = 0
                for ibest in np.arange(len(best_array)):
                    if Enverus_Trans_CompStations.loc[matched[best_array[ibest]], 'FUEL_MCFD'] > 0:
                        total_use += Enverus_Trans_CompStations.loc[matched[best_array[ibest]], 'FUEL_MCFD']
                        nonzero_use += 1
                    Enverus_Trans_CompStations.loc[matched[best_array[ibest]],'match_flag'] = 1
                GHGRP_temp_data.loc[istation,'match_flag'] = 1
                #GHGRP_temp_data.loc[istation,'Env_name'] = Enverus_Trans_CompStations.loc[matched[bestpick], 'NAME']
                GHGRP_temp_data.loc[istation,'Env_county'] = Enverus_Trans_CompStations.loc[best_array[0], 'CNTY_NAME']
                GHGRP_temp_data.loc[istation,'Env_state'] = Enverus_Trans_CompStations.loc[best_array[0], 'STATE_NAME']
                GHGRP_temp_data.loc[istation,'Env_FuelUseage'] = data_fn.safe_div(total_use,nonzero_use)
        
        else: #match stations by hand
            found = 1
            if GHGRP_temp_data.loc[istation,'FID'] == 1008158:
                matched = np.where((Enverus_Trans_CompStations['OPERATOR'] == 'Iroquois Gas Transmission System, LP') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Connecticut'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1004777:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Shevlin - 3') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Minnesota'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1007536:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Tionesta') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'California'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1007450:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Kemmerer') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Wyoming'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1003191:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'CS - 10') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Mississippi'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1006232:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Elberta') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Utah'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1006523:
                matched = np.where((Enverus_Trans_CompStations['CNTY_NAME'] == 'Ellis') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Texas'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1004932:
                matched = np.where((Enverus_Trans_CompStations['CNTY_NAME'] == 'Hansford') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Texas'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1010481:
                matched = np.where((Enverus_Trans_CompStations['CNTY_NAME'] == 'Elko') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Nevada'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1002763:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Dry Lake') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Nevada'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1002762:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Goodsprings') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Nevada'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1005806:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Anshutz') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Wyoming'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1008941:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Green River B') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Wyoming'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1002761:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Muddy Creek') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Wyoming'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1009608:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'CS - 159') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Oklahoma'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1008080:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'CS - 194') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Kansas'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1006497:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Plymouth') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Washington'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1003308:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Searcy') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Arkansas'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1012158:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'CS - 310') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Pennsylvania'))[0]
            elif GHGRP_temp_data.loc[istation,'FID'] == 1011897:
                matched = np.where((Enverus_Trans_CompStations['NAME'] == 'Webb County') & \
                                   (Enverus_Trans_CompStations['STATE_NAME'] == 'Texas'))[0]
            else:
                found = 0
                if DEBUG ==1:
                    print(istation, matched)
                    display(GHGRP_temp_data.loc[istation,:])
            if found==1:
                Enverus_Trans_CompStations.loc[matched[0],'match_flag'] = 1
                GHGRP_temp_data.loc[istation,'match_flag'] = 1
                GHGRP_temp_data.loc[istation,'Env_name'] = Enverus_Trans_CompStations.loc[matched[0], 'NAME']
                GHGRP_temp_data.loc[istation,'Env_county'] = Enverus_Trans_CompStations.loc[matched[0], 'CNTY_NAME']
                GHGRP_temp_data.loc[istation,'Env_state'] = Enverus_Trans_CompStations.loc[matched[0], 'STATE_NAME']
                GHGRP_temp_data.loc[istation,'Env_FuelUseage'] = Enverus_Trans_CompStations.loc[matched[0], 'FUEL_MCFD']

    GHGRP_notmatched = GHGRP_temp_data[GHGRP_temp_data['match_flag'] == 0]
    vars()['GHGRP_transstations'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()   
    Env_notmatched = Enverus_Trans_CompStations[Enverus_Trans_CompStations['match_flag'] == 0]
    Env_notmatched.reset_index(inplace=True, drop=True)
    vars()['Env_Trans_CompStations_notmatched'+'_'+year_range_str[iyear]] = Env_notmatched.copy()                          
    
    print('Year ', year_range_str[iyear],': ', len(GHGRP_notmatched), ' of ', len(GHGRP_temp_data))

##### Step 2.6.4. Calculate the average Emission to fuel Useage Ratio for matched plants

In [None]:
avg_emis_fueluse_ratio = np.zeros([num_years])
GHGRP_station_emi_median = np.zeros([num_years]) #in Tg

print('QA/QC: Average Emissions to Fuel Usage Ratio')
for iyear in np.arange(0,num_years):
    GHGRP_temp_data = vars()['GHGRP_transstations'+'_'+year_range_str[iyear]].copy()

    GHGRP_temp_data['Emis_fuel_ratio']=0
    for iplant in np.arange(0,len(GHGRP_temp_data)):
        GHGRP_temp_data.loc[iplant, 'Emis_fuel_ratio'] = data_fn.safe_div(GHGRP_temp_data.loc[iplant, 'TgCH4'], \
                                                                        GHGRP_temp_data.loc[iplant, 'Env_FuelUseage'])
    GHGRP_temp_data['Emis_fuel_ratio'] = GHGRP_temp_data['Emis_fuel_ratio'].replace({0:np.nan})
    avg_emis_fueluse_ratio[iyear] = np.mean(GHGRP_temp_data['Emis_fuel_ratio'])
    #GHGRP_station_emi_median[iyear] = np.median(GHGRP_temp_data.loc[:, 'TgCH4'])
    
    vars()['GHGRP_transstations'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()
    print('Year ', year_range_str[iyear],': ', 'avg ratio', avg_emis_fueluse_ratio[iyear])# ',','median emi (Tg)', GHGRP_station_emi_median[iyear])
    


##### Step 2.6.5. Map Emissions to CONUS grid

In [None]:
# Map Emissions to Grid (for GHGRP matched stations), Calculate emissions from non-matched stations, 
# apply median plant level emissions to those Enverus stations with zero-calculated emissions
# AK/HI Note: Note that AK/HI compressor transmission stations are not in the Enverus datasets. 
# Therefore, AK/HI emissions will be split from the CONUS region using the relative station counts on and off-grid. 
# The Enverus AK/HI station counts are processed from shapefiles above in 2.6.1. GHGRP counts of off-grid stations
# are then added to this total each year and the ratio of AK/HI vs CONUS stations is used to remove AK/HI fraction
# of national emissions in Step 4 below. 

map_TransCompStations = np.zeros([len(Lat_01),len(Lon_01),num_years]) #data represent a snapshot in time that is applied to entire timeseries
map_TransCompStations_nongrid = np.zeros([num_years])
CONUS_transstat_ratio = np.zeros([num_years])

print('QA/QC: Transmission Station Emissions Gridded:')
for iyear in np.arange(0, num_years):
    stations_ongrid = 0
    stations_nongrid = 0
    #first add GHGRP emissions (for all plants in GHGRP) 
    GHGRP_temp_data = vars()['GHGRP_transstations'+'_'+year_range_str[iyear]].copy()
    for istation in np.arange(0,len(GHGRP_temp_data)):
        #if GHGRP_temp_data.loc[istation,'match_flag']==1:
        if GHGRP_temp_data['Lon'][istation] > Lon_left and GHGRP_temp_data['Lon'][istation] < Lon_right \
            and GHGRP_temp_data['Lat'][istation] > Lat_low and GHGRP_temp_data['Lat'][istation] < Lat_up:
            ilat = int((GHGRP_temp_data['Lat'][istation] - Lat_low)/Res01)
            ilon = int((GHGRP_temp_data['Lon'][istation] - Lon_left)/Res01)
            #if Env_ProcPlant_loc['Throughput'][istation] >0:
            map_TransCompStations[ilat,ilon,iyear] += GHGRP_temp_data.loc[istation, 'TgCH4']
            stations_ongrid +=1
        else:
            map_TransCompStations_nongrid[iyear] += GHGRP_temp_data.loc[istation, 'TgCH4']  
            stations_nongrid +=1

    #then add calculated enverus emissions for all non-matched plants
    Env_temp_data = vars()['Env_Trans_CompStations_notmatched'+'_'+year_range_str[iyear]].copy()
    #display(Env_temp_data)
    for istation in np.arange(0, len(Env_temp_data)):
        #print(Env_temp_data['Longitude'][0])
        if Env_temp_data['Longitude'][istation] > Lon_left and Env_temp_data['Longitude'][istation] < Lon_right \
            and Env_temp_data['Latitude'][istation] > Lat_low and Env_temp_data['Latitude'][istation] < Lat_up:
            ilat = int((Env_temp_data['Latitude'][istation] - Lat_low)/Res01)
            ilon = int((Env_temp_data['Longitude'][istation] - Lon_left)/Res01)
            if Env_temp_data.loc[istation, 'FUEL_MCFD']  > 0 :
                map_TransCompStations[ilat,ilon,iyear] += Env_temp_data.loc[istation,'FUEL_MCFD']*avg_emis_fueluse_ratio[iyear]
            stations_ongrid +=1
        else:
            if Env_temp_data.loc[istation, 'FUEL_MCFD']  > 0 :
                map_TransCompStations_nongrid[iyear] += Env_temp_data.loc[istation, 'FUEL_MCFD']*avg_emis_fueluse_ratio[iyear]
            stations_nongrid +=1
            
    vars()['GHGRP_plants'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()
    vars()['Env_Trans_CompStations_notmatched'+'_'+year_range_str[iyear]] = Env_temp_data.copy()
    
    # Deal with AK/HI emissions (allocate based on off-grid station counts)
    stations_nongrid += AKHI_counts #Add Enverus counts of AK/HI stations to GHGRP counts (hold constant each year)
    #apply this fraction and subtract from the national pipeline emissions in step 4 (the non-grid data is zero)
    CONUS_transstat_ratio[iyear] = stations_nongrid/(stations_ongrid + stations_nongrid)
    #print('Fraction of Stations Outside CONUS: ', CONUS_transstat_ratio)
    
    print('Year: ', year_range_str[iyear])
    print('On grid (Tg):',np.sum(map_TransCompStations[:,:, iyear]), ', stations:',stations_ongrid)
    print('Off grid (Tg):',np.sum(map_TransCompStations_nongrid[iyear]),', stations:', stations_nongrid)

#### Step 2.7  Storage Compressor Stations

##### Step 2.7.1 Read In EIA Storage Field Data

In [None]:
#EIA Storage Field Capacities

names = pd.read_excel(EIA_StorFields_inputfile, skiprows = 0, header = 0)
colnames = names.columns.values
EIA_StorFields = pd.read_excel(EIA_StorFields_inputfile, skiprows = 0, names = colnames)
EIA_StorFields = EIA_StorFields[['Year','Report State ', 'Company Name','Field Name','Reservoir Name','County Name',\
                                 'Status','Total Field Capacity(Mcf)']]
# filter for active storage fields only
EIA_StorFields = EIA_StorFields[EIA_StorFields['Status']== 'Active']
EIA_StorFields.reset_index(drop=True,inplace=True)


##### Step 2.7.2 Read In Enverus Gas Storage Field Data

In [None]:
#Enverus_NG_StorFields_inputfile
names = pd.read_excel(Enverus_NG_StorFields_inputfile, skiprows = 0, header = 0)
colnames = names.columns.values
Env_StorFields = pd.read_excel(Enverus_NG_StorFields_inputfile, skiprows = 0, names = colnames)
#print(colnames)
Env_StorFields = Env_StorFields[['STATUS','RESERVOIR', 'NAME','OPERATOR','STATE_NAME','CNTY_NAME','FLDCAPMMCF',\
                                'Latitude','Longitude']]
Env_StorFields = Env_StorFields[Env_StorFields['STATUS']=='Operational']

Env_StorFields.reset_index(drop=True,inplace=True)

##### Step 2.7.3 Read In Enverus Storage Compressor Data

In [None]:
# Read In Enverus Compressor Station Data (then use this to find the fields with compressor stations)
names = pd.read_excel(Enverus_NG_StorStations_inputfile, skiprows = 0, header = 0)
colnames = names.columns.values
Enverus_Storage_CompStations = pd.read_excel(Enverus_NG_StorStations_inputfile, skiprows = 0, names = colnames)
Enverus_Storage_CompStations = Enverus_Storage_CompStations[['NAME','OPERATOR','TYPE','STATE_NAME','CNTY_NAME','Longitude','Latitude']]
Enverus_Storage_CompStations.reset_index(drop=True,inplace=True)

##### Step 2.7.4. Find Which Enverus Storage Fields have Storage Compressor Stations, save location

In [None]:
#loop through each Enverus storage field to find where there is also a storage compressor station
# First try matching on name, then operator to narrow down
# If no match on name, then try matching based on location (within 0.01 degrees)
# If no match on name, or location (within 0.01 degrees), try matching on 0.05 degrees and county name
# If none of these criteria are met, there is no compressor station at that field. 

# Mark where there is a match and the lat/lon values of the compressor stations. 

Env_StorFields['Comp_flag'] = 0
Env_StorFields['Comp_lat'] = 0
Env_StorFields['Comp_lon'] = 0
Env_StorFields['State'] = ''

nomatch = 0
for ifield in np.arange(0,len(Env_StorFields)):
    matched_state = np.where((Env_StorFields['STATE_NAME'][ifield] == State_ANSI['name']))[0]
    Env_StorFields.loc[ifield,'State'] = State_ANSI.loc[matched_state[0],'abbr']
    matched = np.where((Env_StorFields['NAME'][ifield] == Enverus_Storage_CompStations['NAME']))[0]
    if np.size(matched) >1:
        best_match = np.where(Env_StorFields['OPERATOR'][ifield] == Enverus_Storage_CompStations.loc[matched,'OPERATOR'])[0]
        if np.size(best_match) ==1:
            Env_StorFields.loc[ifield,'Comp_flag'] = 1
            Env_StorFields.loc[ifield,'Comp_lat'] = Enverus_Storage_CompStations.loc[matched[best_match[0]],'Latitude']
            Env_StorFields.loc[ifield,'Comp_lon'] = Enverus_Storage_CompStations.loc[matched[best_match[0]],'Longitude']
        elif np.size(best_match) >1:
            # This is occuring when there is a double count of the compressor stations (entries are identical except for Enverus ID)
            # In this case, assign one compressor station to the field
            Env_StorFields.loc[ifield,'Comp_flag'] = 1 #could alternatively set this to the number of matches (if actually >1 station per field)
            Env_StorFields.loc[ifield,'Comp_lat'] = Enverus_Storage_CompStations.loc[matched[best_match[0]],'Latitude']
            Env_StorFields.loc[ifield,'Comp_lon'] = Enverus_Storage_CompStations.loc[matched[best_match[0]],'Longitude']
        else:
            #more than one case identified where name matches, but operator does not
            nomatch +=1
    elif np.size(matched) ==1:
        Env_StorFields.loc[ifield,'Comp_flag'] = 1
        Env_StorFields.loc[ifield,'Comp_lat'] = Enverus_Storage_CompStations.loc[matched[0],'Latitude']
        Env_StorFields.loc[ifield,'Comp_lon'] = Enverus_Storage_CompStations.loc[matched[0],'Longitude']
        
    elif np.size(matched) <1:
        #if they don't match based on name, then match based on location (likely due to slight spelling differences)
        best_match = np.where((np.abs(Env_StorFields['Latitude'][ifield]-Enverus_Storage_CompStations['Latitude']) < 0.01) & \
                              (np.abs(Env_StorFields['Longitude'][ifield]-Enverus_Storage_CompStations['Longitude']) < 0.01))[0]
        if np.size(best_match) ==1:
            Env_StorFields.loc[ifield,'Comp_flag'] = 1
            Env_StorFields.loc[ifield,'Comp_lat'] = Enverus_Storage_CompStations.loc[best_match[0],'Latitude']
            Env_StorFields.loc[ifield,'Comp_lon'] = Enverus_Storage_CompStations.loc[best_match[0],'Longitude']
        elif np.size(best_match) >1:
            # This is occuring when there is a double count of the compressor stations (entries are identical except for Enverus ID)
            # In this case, assign one compressor station to the field
            Env_StorFields.loc[ifield,'Comp_flag'] = 1 #could alternatively set this to the number of matches (if actually >1 station per field)
            Env_StorFields.loc[ifield,'Comp_lat'] = Enverus_Storage_CompStations.loc[best_match[0],'Latitude']
            Env_StorFields.loc[ifield,'Comp_lon'] = Enverus_Storage_CompStations.loc[best_match[0],'Longitude']
        else:
            best_match = np.where((np.abs(Env_StorFields['Latitude'][ifield]-Enverus_Storage_CompStations['Latitude']) < 0.05) & \
                              (np.abs(Env_StorFields['Longitude'][ifield]-Enverus_Storage_CompStations['Longitude']) < 0.05) &
                                 (Env_StorFields['CNTY_NAME'][ifield] == Enverus_Storage_CompStations.loc[:,'CNTY_NAME']))[0]
            if np.size(best_match) >=1: 
                Env_StorFields.loc[ifield,'Comp_flag'] = 1 #could alternatively set this to the number of matches (if actually >1 station per field)
                Env_StorFields.loc[ifield,'Comp_lat'] = Enverus_Storage_CompStations.loc[best_match[0],'Latitude']
                Env_StorFields.loc[ifield,'Comp_lon'] = Enverus_Storage_CompStations.loc[best_match[0],'Longitude']
            else:
                nomatch +=1
#print('NO MATCH', nomatch)

print('Number of Enverus Fields w/ Compressor Stations: ',len(Env_StorFields[Env_StorFields['Comp_flag']==1]))
print('Number of Enverus Fields w/out Compressor Stations: ',len(Env_StorFields[Env_StorFields['Comp_flag']==0]))

##### Step 2.7.5 Match the EIA and Enverus Storage Field/Station Data, record associated Stor. Compressor Station locations

In [None]:
# First clean up/correct mistakes in arrays
Env_StorFields['CNTY_NAME'] = Env_StorFields['CNTY_NAME'].str.lower()
Env_StorFields['RESERVOIR'] = Env_StorFields['RESERVOIR'].str.lower()
Env_StorFields['NAME'] = Env_StorFields['NAME'].str.lower()
Env_StorFields['OPERATOR'] = Env_StorFields['OPERATOR'].str.lower()
EIA_StorFields['Reservoir Name'] = EIA_StorFields['Reservoir Name'].replace({np.nan:'NaN'})
EIA_StorFields['County Name'] = EIA_StorFields['County Name'].replace({np.nan:'NaN'})
Env_StorFields['RESERVOIR'] = Env_StorFields['RESERVOIR'].str.replace(r"-","")
EIA_StorFields['Reservoir Name'] = EIA_StorFields['Reservoir Name'].str.replace(r"-","")
Env_StorFields['RESERVOIR'] = Env_StorFields['RESERVOIR'].str.replace(r".","")
EIA_StorFields['Reservoir Name'] = EIA_StorFields['Reservoir Name'].str.replace(r"\(","")
EIA_StorFields['Reservoir Name'] = EIA_StorFields['Reservoir Name'].str.replace(r"\)","")
Env_StorFields['NAME'] = Env_StorFields['NAME'].str.replace(r".","")
EIA_StorFields['Reservoir Name'] = EIA_StorFields['Reservoir Name'].str.replace(r".","")
EIA_StorFields['Field Name'] = EIA_StorFields['Field Name'].str.rstrip()
EIA_StorFields['Reservoir Name'] = EIA_StorFields['Reservoir Name'].str.rstrip()
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'CA') & (EIA_StorFields['County Name'] == 'Butte')),'County Name'] = 'colusa'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IA') & (EIA_StorFields['County Name'] == 'Winnebago')),'County Name'] = 'washington'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'La Salle')),'County Name'] = 'lasalle'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'Coles') & (EIA_StorFields['Reservoir Name'] == 'NIAGARIAN')),'County Name'] = 'peoria'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'Coles') & (EIA_StorFields['Reservoir Name'] == 'NIAGARAN')),'County Name'] = 'peoria'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'Coles') & (EIA_StorFields['Reservoir Name'] == 'GLASFORD')),'County Name'] = 'peoria'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'Logan') & (EIA_StorFields['Reservoir Name'] == 'GALESVILLE')),'County Name'] = 'warren'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'Coles') & (EIA_StorFields['Reservoir Name'] == 'BENOIST')),'County Name'] = 'bond'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'Douglas') & (EIA_StorFields['Reservoir Name'] == 'CYPRESS  ROSICL')),'County Name'] = 'moultrie'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'Douglas') & (EIA_StorFields['Reservoir Name'] == 'CYPRESS ROSICLARE')),'County Name'] = 'moultrie'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'Mclean') & (EIA_StorFields['Field Name'] == 'PECATONICA')),'County Name'] = 'winnebago'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IL') & (EIA_StorFields['County Name'] == 'Montgomery') & (EIA_StorFields['Field Name'] == 'HILLSBORO')),'County Name'] = 'st. clair'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IN') & (EIA_StorFields['County Name'] == 'Daviess') & (EIA_StorFields['Field Name'] == 'WHITE RIVER')),'County Name'] = 'pike'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'IN') & (EIA_StorFields['County Name'] == 'Clark') & (EIA_StorFields['Field Name'] == 'WOLCOTT')),'County Name'] = 'white'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'KS') & (EIA_StorFields['County Name'] == 'Woodson') & (EIA_StorFields['Field Name'] == 'PIQUA')),'County Name'] = 'allen'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'KS') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'PIQUA')),'County Name'] = 'allen'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'KS') & (EIA_StorFields['County Name'] == 'Morris') & (EIA_StorFields['Field Name'] == 'BOEHM')),'County Name'] = 'morton'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'KY') & (EIA_StorFields['County Name'] == 'Hart') & (EIA_StorFields['Field Name'] == 'MAGNOLIA UPPER')),'County Name'] = 'larue'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'KY') & (EIA_StorFields['County Name'] == 'Hart') & (EIA_StorFields['Field Name'] == 'MAGNOLIA DEEP')),'County Name'] = 'larue'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'KY') & (EIA_StorFields['County Name'] == 'Meade') & (EIA_StorFields['Field Name'] == 'DOE RUN')),'County Name'] = 'hardin'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'KY') & (EIA_StorFields['County Name'] == 'Daviess') & (EIA_StorFields['Field Name'] == 'EAST DIAMOND')),'County Name'] = 'hopkins'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'KY') & (EIA_StorFields['County Name'] == 'Christian') & (EIA_StorFields['Field Name'] == 'CROFTON EAST')),'County Name'] = 'hopkins'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'LA') & (EIA_StorFields['County Name'] == 'Ascension') & (EIA_StorFields['Field Name'] == 'NAPOLEON')),'County Name'] = 'assumption parish'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'LA') & (EIA_StorFields['County Name'] == 'Ascension') & (EIA_StorFields['Field Name'] == 'NAPOLEONVILLE')),'County Name'] = 'assumption parish'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'LA') & (EIA_StorFields['County Name'] == 'East Carroll') & (EIA_StorFields['Field Name'] == 'EPPS')),'County Name'] = 'west carroll parish'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'LA') & (EIA_StorFields['County Name'] == 'W. Carroll') & (EIA_StorFields['Field Name'] == 'EPPS')),'County Name'] = 'west carroll parish'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'LA') & (EIA_StorFields['County Name'] == 'Iberia') & (EIA_StorFields['Field Name'] == 'JEFFERSON ISLAN')),'County Name'] = 'vermilion parish'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'LA') & (EIA_StorFields['County Name'] == 'Iberia') & (EIA_StorFields['Field Name'] == 'JEFFERSON ISLAND')),'County Name'] = 'vermilion parish'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'Oakland') & (EIA_StorFields['Field Name'] == 'LYON 29')),'County Name'] = 'washtenaw'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'St. Clair') & (EIA_StorFields['Field Name'] == 'MARYSVILLE STORAGE')),'Reservoir Name'] = 'morton 16'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'St. Clair') & (EIA_StorFields['Field Name'] == 'MARYSVILLE STOR')),'Reservoir Name'] = 'morton 16'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'St. Clair') & (EIA_StorFields['Field Name'] == 'LEE 2')),'County Name'] = 'calhoun'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'St. Clair') & (EIA_StorFields['Field Name'] == 'LEE 11')),'County Name'] = 'calhoun'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'WINTERFIELD')),'County Name'] = 'clare'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'CRANBERRY LAKE')),'County Name'] = 'clare'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'HESSEN')),'County Name'] = 'st. clair'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'IRA')),'County Name'] = 'st. clair'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'FOUR CORNERS')),'County Name'] = 'st. clair'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'SWAN CREEK')),'County Name'] = 'st. clair'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'PUTTYGUT')),'County Name'] = 'st. clair'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'WINFIELD')),'County Name'] = 'montcalm'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'St. Clair') & (EIA_StorFields['Field Name'] == 'TAGGART')),'County Name'] = 'montcalm'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MI') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'LOREED')),'County Name'] = 'osceola'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MN') & (EIA_StorFields['County Name'] == 'Waseca') & (EIA_StorFields['Field Name'] == 'WATERVILLE')),'County Name'] = 'steele'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MS') & (EIA_StorFields['County Name'] == 'Adams') & (EIA_StorFields['Field Name'] == 'NEW HOME DOME')),'County Name'] = 'smith'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MS') & (EIA_StorFields['County Name'] == 'Jasper') & (EIA_StorFields['Field Name'] == 'NEW HONE DOME')),'County Name'] = 'smith'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MS') & (EIA_StorFields['County Name'] == 'Monroe') & (EIA_StorFields['Field Name'] == 'GOODWIN')),'County Name'] = 'itawamba'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MS') & (EIA_StorFields['County Name'] == 'Monroe') & (EIA_StorFields['Field Name'] == 'GOODWIN STORAGE')),'County Name'] = 'itawamba'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MS') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'HATTIESBURG')),'County Name'] = 'forrest'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MS') & (EIA_StorFields['County Name'] == 'Montgomery') & (EIA_StorFields['Field Name'] == 'SOUTHERN PINES')),'County Name'] = 'greene'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'MT') & (EIA_StorFields['County Name'] == 'Blaine') & (EIA_StorFields['Field Name'] == 'DRY CREEK')),'County Name'] = 'carbon'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'NY') & (EIA_StorFields['County Name'] == 'Medina') & (EIA_StorFields['Field Name'] == 'BENNINGTON STOR')),'County Name'] = 'wyoming'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'NY') & (EIA_StorFields['County Name'] == 'Erie') & (EIA_StorFields['Field Name'] == 'BENNINGTON STOR')),'County Name'] = 'wyoming'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'NY') & (EIA_StorFields['County Name'] == 'Erie') & (EIA_StorFields['Field Name'] == 'BENNINGTON STORAGE')),'County Name'] = 'wyoming'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'NY') & (EIA_StorFields['County Name'] == 'Kings') & (EIA_StorFields['Field Name'] == 'BEECH HILL STORAGE')),'County Name'] = 'allegany'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'NY') & (EIA_StorFields['County Name'] == 'Putnam') & (EIA_StorFields['Field Name'] == 'SENECA LAKE STORAGE')),'County Name'] = 'schuyler'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'NY') & (EIA_StorFields['County Name'] == 'Putnam') & (EIA_StorFields['Field Name'] == 'DUNDEE')),'County Name'] = 'schuyler'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'OH') & (EIA_StorFields['County Name'] == 'Hocking') & (EIA_StorFields['Field Name'] == 'CRAWFORD')),'County Name'] = 'fairfield'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'OH') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'BRINKER')),'County Name'] = 'columbiana'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'OH') & (EIA_StorFields['County Name'] == 'Wayne') & (EIA_StorFields['Field Name'] == 'GABOR WERTZ')),'County Name'] = 'summit'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'OH') & (EIA_StorFields['County Name'] == 'Hancock') & (EIA_StorFields['Field Name'] == 'BENTON')),'County Name'] = 'hocking'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'OH') & (EIA_StorFields['County Name'] == 'Wayne') & (EIA_StorFields['Field Name'] == 'HOLMES')),'County Name'] = 'holmes'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'OK') & (EIA_StorFields['County Name'] == 'Grady') & (EIA_StorFields['Field Name'] == 'SALT PLAINS STO')),'County Name'] = 'grant'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'PA') & (EIA_StorFields['County Name'] == 'Warren') & (EIA_StorFields['Field Name'] == 'EAST BRANCH STO')),'County Name'] = 'mckean'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'PA') & (EIA_StorFields['County Name'] == 'Warren') & (EIA_StorFields['Field Name'] == 'EAST BRANCH STORAGE')),'County Name'] = 'mckean'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'PA') & (EIA_StorFields['County Name'] == 'Potter') & (EIA_StorFields['Field Name'] == 'LEIDY TAMARACK')),'County Name'] = 'clinton'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'PA') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'LEIDY TAMARACK')),'County Name'] = 'clinton'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'PA') & (EIA_StorFields['County Name'] == 'Allegheny') & (EIA_StorFields['Field Name'] == 'WEBSTER')),'County Name'] = 'westmoreland'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'PA') & (EIA_StorFields['County Name'] == 'Allegheny') & (EIA_StorFields['Field Name'] == 'RAGER MOUNTAIN')),'County Name'] = 'cambria'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'PA') & (EIA_StorFields['County Name'] == 'Mercer') & (EIA_StorFields['Field Name'] == 'HENDERSON STORA')),'County Name'] = 'venango'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'PA') & (EIA_StorFields['County Name'] == 'Mercer') & (EIA_StorFields['Field Name'] == 'HENDERSON STORAGE')),'County Name'] = 'venango'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'TX') & (EIA_StorFields['County Name'] == 'Fort Bend') & (EIA_StorFields['Field Name'] == 'KATY HUB & STOR')),'County Name'] = 'waller'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'TX') & (EIA_StorFields['County Name'] == 'Fort Bend') & (EIA_StorFields['Field Name'] == 'KATY HUB & STORA')),'County Name'] = 'waller'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'WV') & (EIA_StorFields['County Name'] == 'Doddridge') & (EIA_StorFields['Field Name'] == 'SHIRLEY')),'County Name'] = 'tyler'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'WV') & (EIA_StorFields['County Name'] == 'Raleigh') & (EIA_StorFields['Field Name'] == 'RALEIGH CITY')),'County Name'] = 'wyoming'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'WV') & (EIA_StorFields['County Name'] == 'Kanawha') & (EIA_StorFields['Field Name'] == 'RALEIGH CITY')),'County Name'] = 'wyoming'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'WY') & (EIA_StorFields['County Name'] == 'Fremont') & (EIA_StorFields['Field Name'] == 'BUNKER HILL')),'County Name'] = 'carbon'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'WV') & (EIA_StorFields['County Name'] == 'Wirt') & (EIA_StorFields['Field Name'] == 'ROCKPORT')),'County Name'] = 'wood'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'WV') & (EIA_StorFields['County Name'] == 'Ritchie') & (EIA_StorFields['Field Name'] == 'RACKET  NEW BER')),'County Name'] = 'gilmer'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'WV') & (EIA_StorFields['County Name'] == 'Ritchie') & (EIA_StorFields['Field Name'] == 'RACHET-NEWBERNE')),'County Name'] = 'gilmer'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'TX') & (EIA_StorFields['County Name'] == 'NaN') & (EIA_StorFields['Field Name'] == 'WEST CLEAR LAKE')),'County Name'] = 'harris'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'TX') & (EIA_StorFields['County Name'] == 'Bastrop') & (EIA_StorFields['Field Name'] == 'PIERCE JUNCTION')),'County Name'] = 'harris'
Env_StorFields.loc[((Env_StorFields['State'] == 'KS') & (Env_StorFields['NAME'] == 'welda (north)')), 'NAME'] = 'north welda'
Env_StorFields.loc[((Env_StorFields['State'] == 'KS') & (Env_StorFields['NAME'] == 'welda (south)')), 'NAME'] = 'south welda'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'TX') & (EIA_StorFields['Reservoir Name'] == 'DW69')), 'Reservoir Name'] = 'dw 6'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'TX') & (EIA_StorFields['Reservoir Name'] == 'DW 69')), 'Reservoir Name'] = 'dw 6'
EIA_StorFields.loc[((EIA_StorFields['Report State '] == 'PA') & (EIA_StorFields['Field Name'] == 'ST  MARYS STORAGE')), 'Field Name'] = 'ST MARYS STORAGE'

# Second, loop through each EIA storage field, find the matching field in Enverus (by county, state, reservoir,
# company, operator, etc) and record the associated storage compressor station location (if the field has one)
# NOTE: As of March 2022, there are ~15 EIA fields that could not be matched to the Enverus dataset. In this
# case, these fields are assumed to have zero compressor stations and are not accounted for in the national
# or CONUS total capacity calculations (used to calculate emissions based on ratio to GHGRP data)

EIA_StorFields['Lat'] = 0
EIA_StorFields['Lon'] = 0
EIA_StorFields['Comp_flag'] = 0
DEBUG = 0

print('QA/QC: The following EIA fields could not be matched to Enverus Gas Storage Fields')
print('Assume that these fields have 0 storage compressor stations')

for ifield in np.arange(0,len(EIA_StorFields)):
    #first match based on state and county, then match either reservoir or name
    matched = np.where((EIA_StorFields['Report State '][ifield] == Env_StorFields['State']) & \
                           (Env_StorFields['CNTY_NAME'].str.contains(EIA_StorFields['County Name'][ifield][0:5].lower())))[0]
    #print(ifield)
    if EIA_StorFields['Reservoir Name'][ifield][0:10] != 'NaN':
        #for all the fields in the same state and county, choose the field that is in the same reservoir
        best_match =  np.where(Env_StorFields['RESERVOIR'][matched].str.contains(EIA_StorFields['Reservoir Name'][ifield][0:10].lower()))[0]
        
        if np.size(best_match) == 1:
            loc = matched[best_match[0]]
            EIA_StorFields.loc[ifield,'Lat'] = Env_StorFields.loc[loc,'Comp_lat']
            EIA_StorFields.loc[ifield,'Lon'] = Env_StorFields.loc[loc,'Comp_lon']
            EIA_StorFields.loc[ifield,'Comp_flag'] = Env_StorFields.loc[loc,'Comp_flag']
        elif np.size(best_match) >1:
            # there is more than one field in the state, county, and reservoir - assign based on either matching company or field name, if
            # still more than one match, assign manually
            #print('>1 match')
            if 'liberty north' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched[best_match]].str.contains('liberty north'))[0]
                loc = matched[best_match[better_match[0]]]
            elif 'liberty south' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched[best_match]].str.contains('liberty south'))[0]
                loc = matched[best_match[better_match[0]]]
            elif 'st  charles' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched[best_match]].str.contains('st charles'))[0]
                loc = matched[best_match[better_match[0]]] 
            elif 'east diamond' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched].str.contains('east diamond'))[0]
                loc = matched[better_match[0]]
            elif 'crofton east' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched[best_match]].str.contains('kirkwood springs'))[0]
                loc = matched[best_match[better_match[0]]] 
            elif 'cold springs' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched][0:14].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()))[0]
                loc = matched[better_match[0]]
            elif 'niagaran' in EIA_StorFields.loc[ifield,'Reservoir Name'].lower() :
                if 'belle river' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                    better_match = np.where(Env_StorFields['NAME'][matched].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()))[0]
                    loc = matched[better_match[0]]
                else:
                    better_match = np.where(Env_StorFields['NAME'][matched[best_match]].str.contains(EIA_StorFields.loc[ifield,'Field Name'][0:12].lower()))[0]
                    loc = matched[best_match[better_match[0]]]
            elif EIA_StorFields.loc[ifield,'Reservoir Name'].lower() == 'niagaran':
                better_match = np.where(Env_StorFields['NAME'][matched].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()))[0]
                loc = matched[better_match[0]]
            elif 'washington ' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched][0:13].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()))[0]
                loc = matched[better_match[0]]
            elif 'greenwood' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched][0:13].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()))[0]
                loc = matched[better_match[0]]
            elif 'amory storage' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()[0:5]))[0]
                loc = matched[better_match[0]]
            elif 'derby storage' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()[0:5]))[0]
                loc = matched[better_match[0]]
            elif 'zane storage' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched[best_match]].str.contains('zane'))[0]
                loc = matched[best_match[better_match[0]]]
            elif 'artemas ' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched][0:11].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()))[0]
                loc = matched[better_match[0]]
            elif 'ellisburg' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['OPERATOR'][matched[best_match]].str.contains('berkshire'))[0]
                loc = matched[best_match[better_match[0]]]
            elif 'stratton ridge' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['OPERATOR'][matched[best_match]].str.contains('freeport'))[0]
                loc = matched[best_match[better_match[0]]]
            elif 'spindletop' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['OPERATOR'][matched].str.contains(EIA_StorFields.loc[ifield,'Company Name'].lower()[0:6]))[0]
                loc = matched[better_match[0]]
            elif 'ambassador' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched[best_match]].str.contains('la-pan'))[0]
                loc = matched[best_match[better_match[0]]]
            elif 'terra alta' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                better_match = np.where(Env_StorFields['NAME'][matched][0:15].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()))[0]
                loc = matched[better_match[0]]
            else:
                better_match =  np.where(Env_StorFields['NAME'][matched[best_match]].str.contains(EIA_StorFields['Company Name'][ifield][0:6].lower()) | \
                               (Env_StorFields['NAME'][matched[best_match]].str.contains(EIA_StorFields['Field Name'][ifield][0:6].lower())))[0]
                loc = matched[best_match[better_match[0]]]
                if np.size(better_match) != 1:
                    print('> 1 match - Check Manually')
                    print(EIA_StorFields.loc[ifield,:])
                    display(Env_StorFields.loc[matched[best_match[better_match]],:])
            
            #Assign lat/Lon, whether there is compressor station there
            EIA_StorFields.loc[ifield,'Lat'] = Env_StorFields.loc[loc,'Comp_lat']
            EIA_StorFields.loc[ifield,'Lon'] = Env_StorFields.loc[loc,'Comp_lon']
            EIA_StorFields.loc[ifield,'Comp_flag'] = Env_StorFields.loc[loc,'Comp_flag']
        else:
            #print('NO match')
            #this will occur if there is a match based on county/state, but not on reservoir
            # (or some cases where no match on county/state)
            # in this case, look at company and field name and assign mannually if needed
            if 'totem storage' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched].str.contains('totem'))[0]
            elif 'lincoln storage' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched].str.contains('lincoln'))[0]
            elif 'cecilia storage' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched].str.contains('cecilia'))[0]
            elif 'egan storage do' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched].str.contains('egan'))[0]
            elif 'washington ' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched].str.contains(EIA_StorFields.loc[ifield,'Field Name'].lower()[0:13]))[0]
            elif 'petal' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched][0:5].str.contains('hattiesburg'))[0]
            elif 'zoar storage' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched].str.contains('zoar'))[0]
            elif 'love storage' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched][0:5].str.contains('perry'))[0]
            elif 'swarts and swar' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched].str.contains('swarts'))[0]
            elif 'clemens  n.e.' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched][0:5].str.contains('clemens'))[0]
            elif 'worsham steed' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched][0:6].str.contains('worsham-steed'))[0]
            elif 'early grove' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched][0:6].str.contains('early grove'))[0]
            elif 'terra alta' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                if 'south' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                    best_match = np.where(Env_StorFields['NAME'][matched].str.contains('south'))[0]
                else:
                    best_match = np.where(~Env_StorFields['NAME'][matched].str.contains('south'))[0]           
            elif 'racket ' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched][0:15].str.contains('rachet-newberne'))[0]
            elif 'rachet' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched][0:15].str.contains('rachet-newberne'))[0]
            elif 'ryckman creek' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched][0:15].str.contains('belle butte'))[0]
            elif 'east mahoney' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched][0:15].str.contains('oil springs'))[0]
            else:
                best_match =  np.where(Env_StorFields['NAME'][matched][0:8].str.contains(EIA_StorFields['Company Name'][ifield][0:8].lower()) | \
                               (Env_StorFields['NAME'][matched][0:8].str.contains(EIA_StorFields['Field Name'][ifield][0:8].lower())))[0]
            if np.size(best_match) != 1:
                #continue
                if DEBUG ==1:
                    print('No best match - Check Mannually')
                    print(EIA_StorFields.loc[ifield,:])
                    display(Env_StorFields.loc[matched[best_match],:])
            else:
                loc = matched[best_match[0]]
                #Assign lat/Lon, whether there is compressor station there
                EIA_StorFields.loc[ifield,'Lat'] = Env_StorFields.loc[loc,'Comp_lat']
                EIA_StorFields.loc[ifield,'Lon'] = Env_StorFields.loc[loc,'Comp_lon']
                EIA_StorFields.loc[ifield,'Comp_flag'] = Env_StorFields.loc[loc,'Comp_flag']

    else:
        #in this case, the EIA data has no reservoir information and need to match based on company/field name
        #print(ifield)
        #print('NO res')
        best_match =  np.where((Env_StorFields['NAME'][matched][0:6].str.contains(EIA_StorFields['Company Name'][ifield][0:6].lower())) | \
                               (Env_StorFields['NAME'][matched][0:6].str.contains(EIA_StorFields['Field Name'][ifield][0:6].lower())))[0]
        if np.size(best_match) ==1:
            loc = matched[best_match[0]]
            EIA_StorFields.loc[ifield,'Lat'] = Env_StorFields.loc[loc,'Comp_lat']
            EIA_StorFields.loc[ifield,'Lon'] = Env_StorFields.loc[loc,'Comp_lon']
            EIA_StorFields.loc[ifield,'Comp_flag'] = Env_StorFields.loc[loc,'Comp_flag']
        elif np.size(best_match) >1:
            #if more than one match based on county/state, and no reservoir data...look at operator
            better_match =  np.where((Env_StorFields['OPERATOR'][matched[best_match]][0:6].str.contains(EIA_StorFields['Company Name'][ifield][0:6].lower())))[0]
            if np.size(better_match) ==1:
                # if one operator match...
                loc = matched[best_match[better_match[0]]]
                EIA_StorFields.loc[ifield,'Lat'] = Env_StorFields.loc[loc,'Comp_lat']
                EIA_StorFields.loc[ifield,'Lon'] = Env_StorFields.loc[loc,'Comp_lon']
                EIA_StorFields.loc[ifield,'Comp_flag'] = Env_StorFields.loc[loc,'Comp_flag']
            elif np.size(better_match) >1:
                # if more than one operator match...
                if EIA_StorFields.loc[ifield,'Field Name'].lower() == 'kirby hills wagenet':
                    finalmatch = np.where(Env_StorFields['RESERVOIR'][matched[best_match[better_match]]].str.contains('wagenet'))[0]
                    loc = matched[best_match[better_match[finalmatch[0]]]]
                elif 'early grove' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                    finalmatch = np.where(Env_StorFields['NAME'][matched[best_match[better_match]]].str.contains('early grove'))[0] 
                    loc = matched[best_match[better_match[finalmatch[0]]]]
                else:
                    print('HERE STOP*****************************')
                    loc = -99
            else:
                # if no operator match
                if EIA_StorFields.loc[ifield,'Field Name'].lower() == 'markham':
                    finalmatch = np.where(Env_StorFields['NAME'][matched[best_match]].str.contains('markham'))[0]
                    loc = matched[best_match[finalmatch[0]]]
                else:
                    print('STOP HERE2*******************************')
                    loc = -99
            # Assign lat/lon values
            EIA_StorFields.loc[ifield,'Lat'] = Env_StorFields.loc[loc,'Comp_lat']
            EIA_StorFields.loc[ifield,'Lon'] = Env_StorFields.loc[loc,'Comp_lon']
            EIA_StorFields.loc[ifield,'Comp_flag'] = Env_StorFields.loc[loc,'Comp_flag']     
    
        else:
            #if no reservoir, or company or field match
            if 'egan storage dome' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'][matched].str.contains('egan'))[0]
                loc = matched[best_match[0]]
            elif 'new home dome' in EIA_StorFields.loc[ifield,'Field Name'].lower():
                best_match = np.where(Env_StorFields['NAME'].str.contains('new home dome'))[0]
                loc =best_match[0]
            else:
                #print(best_match)
                print('No Res Data - Check Mannually')
                print(EIA_StorFields.loc[ifield,:])
                display(Env_StorFields.loc[matched,:])
                loc =-99
            if loc > 0:
                # Assign lat/lon values
                EIA_StorFields.loc[ifield,'Lat'] = Env_StorFields.loc[loc,'Comp_lat']
                EIA_StorFields.loc[ifield,'Lon'] = Env_StorFields.loc[loc,'Comp_lon']
                EIA_StorFields.loc[ifield,'Comp_flag'] = Env_StorFields.loc[loc,'Comp_flag']  

print('QA/QC: Report the number of EIA fields and Enverus storage compressor stations')
for iyear in np.arange(0,num_years):
    total_stations = len(EIA_StorFields[(EIA_StorFields['Year']==year_range[iyear]) & (EIA_StorFields['Comp_flag']==1)])
    total_fields = len(EIA_StorFields[EIA_StorFields['Year']==year_range[iyear]])
    print('Year: ', year_range_str[iyear])
    print('Fields, stations, (%):', total_fields,',', total_stations,',',round((total_stations/total_fields)*100,2))

##### Step 2.7.6 Read In GHGRP Storage Compressor Station Data

In [None]:
#a) Read in the GHGRP data
# emissions of methane reported in metric ton
facility_info = pd.read_csv(GHGRP_facility_inputfile)
facility_emissions = pd.read_excel(GHGRP_subpartw_inputfile,sheet_name = 'Export Worksheet')
facility_emissions = facility_emissions[facility_emissions['INDUSTRY_SEGMENT'] =='Underground natural gas storage [98.230(a)(5)]']
facility_emissions = facility_emissions[facility_emissions['TOTAL_REPORTED_CH4_EMISSIONS'] >0]
facility_emissions = facility_emissions[facility_emissions['REPORTING_YEAR'] <= year_range[-1]]
facility_emissions.reset_index(drop=True,inplace=True)
#print(facility_emissions)

facility_emissions['State'] = ''
facility_emissions['County'] = ''
facility_emissions['City'] = ''
facility_emissions['Zip'] = 0
facility_emissions['Lat'] = 0
facility_emissions['Lon'] = 0

#b) match GHGRP facility and emissions data
# for each entry in the data file (each facility each year), match the facility ID to the ID in the
# GHGRP facility info file, then append the corresponding location data to the emissions array
for index in np.arange(len(facility_emissions)):
    #print(index)
    ilocation = np.where(facility_info['V_GHG_EMITTER_FACILITIES.FACILITY_ID'] == facility_emissions['FACILITY_ID'][index])[0][0]
    #for iloc in len(ilocation)
    facility_emissions.loc[index, 'State'] = facility_info['V_GHG_EMITTER_FACILITIES.STATE'][ilocation]
    facility_emissions.loc[index, 'County'] = facility_info['V_GHG_EMITTER_FACILITIES.COUNTY'][ilocation]
    facility_emissions.loc[index, 'City'] = facility_info['V_GHG_EMITTER_FACILITIES.CITY'][ilocation]
    facility_emissions.loc[index, 'Zip'] = facility_info['V_GHG_EMITTER_FACILITIES.ZIP'][ilocation]
    facility_emissions.loc[index, 'Lat'] = facility_info['V_GHG_EMITTER_FACILITIES.LATITUDE'][ilocation]
    facility_emissions.loc[index, 'Lon'] = facility_info['V_GHG_EMITTER_FACILITIES.LONGITUDE'][ilocation]
    
#b) make station-specific arrays for each year (with emissions in Tg)
print('QA/QC: Check that all GHGRP emissions are allocated to specific plants')
for iyear in np.arange(0,num_years):
    facility_emissions_temp = facility_emissions[facility_emissions['REPORTING_YEAR'] ==year_range[iyear]]
    facility_emissions_temp.reset_index(drop=True,inplace=True)
    GHGRP_storstations = pd.DataFrame({'FID':facility_emissions_temp['FACILITY_ID'].unique()})
    GHGRP_storstations['Name'] = ' '
    GHGRP_storstations['State'] = ' '
    GHGRP_storstations['County'] = ' '
    GHGRP_storstations['City'] = ' '
    GHGRP_storstations['Zip'] = 0
    GHGRP_storstations['Lat'] = 0.0
    GHGRP_storstations['Lon'] = 0.0
    GHGRP_storstations['TgCH4'] = 0.0

    #Put everything in per-plant array
    for idx in np.arange(len(facility_emissions_temp)):
        iFID = np.where(GHGRP_storstations['FID'] == facility_emissions_temp['FACILITY_ID'][idx])[0][0]
        GHGRP_storstations.loc[iFID,'Name']   = facility_emissions_temp['FACILITY_NAME'][idx]
        GHGRP_storstations.loc[iFID,'State']  = facility_emissions_temp['State'][idx]
        GHGRP_storstations.loc[iFID,'County'] = facility_emissions_temp['County'][idx]
        GHGRP_storstations.loc[iFID,'City'] = facility_emissions_temp['City'][idx]
        GHGRP_storstations.loc[iFID,'Zip']    = facility_emissions_temp['Zip'][idx]
        GHGRP_storstations.loc[iFID,'Lat']    = facility_emissions_temp['Lat'][idx]
        GHGRP_storstations.loc[iFID,'Lon']    = facility_emissions_temp['Lon'][idx]
        GHGRP_storstations.loc[iFID,'TgCH4'] += facility_emissions_temp['TOTAL_REPORTED_CH4_EMISSIONS'][idx]/1e6
    
    vars()['GHGRP_storstations'+'_'+year_range_str[iyear]] = GHGRP_storstations
    diff1 = abs(facility_emissions_temp['TOTAL_REPORTED_CH4_EMISSIONS'].sum()/1e6 -GHGRP_storstations['TgCH4'].sum())/ \
        ((facility_emissions_temp['TOTAL_REPORTED_CH4_EMISSIONS'].sum()/1e6 + GHGRP_storstations['TgCH4'].sum())/2)
    #print(summary_emi)
    #print(sum_emi2[iyear])
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL: ', diff1,'%') 
    print('Number of GHGRP Storage Stations: ', len(vars()['GHGRP_storstations'+'_'+year_range_str[iyear]]))

##### Step 2.7.7 Match EIA storage compressor stations to GHGRP based on location

In [None]:
# For each year of GHGRP data, match GHGRP stroage stations to EIA data (based on nearest location, not name)

print('QA/QC: Number of GHGRP plants not in Enverus data set')
for iyear in np.arange(0,num_years):
    # use the correct year of data and filter only for fields with storage compressor stations
    GHGRP_temp_data = vars()['GHGRP_storstations'+'_'+year_range_str[iyear]].copy()
    EIA_StorFields_temp = EIA_StorFields[(EIA_StorFields['Year']==year_range[iyear])]# & (EIA_StorFields['Comp_flag'] ==1)]
    EIA_StorFields_temp.reset_index(inplace=True, drop=True)
    
    GHGRP_temp_data.loc[:,'match_flag'] = 0
    GHGRP_temp_data.loc[:,'EIA_name'] = ''
    GHGRP_temp_data.loc[:,'EIA_county'] = ''
    GHGRP_temp_data.loc[:,'EIA_state'] = ''
    #GHGRP_temp_data['Env_HP'] = 0
    GHGRP_temp_data.loc[:,'EIA_fieldcap'] = 0
    EIA_StorFields_temp.loc[:,'GHGRP_match'] = 0

    #First, find exact matching lat/lon facilities
    for istation in np.arange(0,len(GHGRP_temp_data)):
        matched = np.where((np.abs(EIA_StorFields_temp['Lat']-GHGRP_temp_data['Lat'][istation]) < 0.12) & \
                              (np.abs(EIA_StorFields_temp['Lon']-GHGRP_temp_data['Lon'][istation]) < 0.12))[0]
        if np.size(matched)==1:
            EIA_StorFields_temp.loc[matched[0],'GHGRP_match'] = 1
            GHGRP_temp_data.loc[istation,'match_flag'] = 1
            GHGRP_temp_data.loc[istation,'EIA_name'] = EIA_StorFields_temp.loc[matched[0], 'Company Name']
            GHGRP_temp_data.loc[istation,'EIA_county'] = EIA_StorFields_temp.loc[matched[0], 'County Name']
            GHGRP_temp_data.loc[istation,'EIA_state'] = EIA_StorFields_temp.loc[matched[0], 'Report State ']
            GHGRP_temp_data.loc[istation,'EIA_fieldcap'] = EIA_StorFields_temp.loc[matched[0], 'Total Field Capacity(Mcf)']
        elif np.size(matched) > 1:
            dist_calc = np.zeros(len(matched))
            GHGRP_temp_data.loc[istation,'match_flag'] = 1
            #print(dist_calc)
            for imatch in np.arange(len(dist_calc)): #loop through the matching stations to find the closest match
                dist_calc[imatch] = np.abs(GHGRP_temp_data.loc[istation,'Lat'] - \
                                           EIA_StorFields_temp.loc[matched[imatch],'Lat'])**2 + \
                               np.abs(GHGRP_temp_data.loc[istation,'Lon'] - EIA_StorFields_temp.loc[matched[imatch],'Lon'])**2
            bestpick = np.where(dist_calc == dist_calc.min())[0][0]
            #print(bestpick)
            #print(np.size(np.where(dist_calc == dist_calc.min())[0]))
            if np.size(np.where(dist_calc == dist_calc.min())[0]) == 1: #if there is only one match, assign the correct data
                #print(matched[bestpick])
                EIA_StorFields_temp.loc[matched[bestpick],'GHGRP_match'] = 1
                GHGRP_temp_data.loc[istation,'match_flag'] = 1
                GHGRP_temp_data.loc[istation,'EIA_name'] = EIA_StorFields_temp.loc[matched[bestpick], 'Company Name']
                GHGRP_temp_data.loc[istation,'EIA_county'] = EIA_StorFields_temp.loc[matched[bestpick], 'County Name']
                GHGRP_temp_data.loc[istation,'EIA_state'] = EIA_StorFields_temp.loc[matched[bestpick], 'Report State ']
                GHGRP_temp_data.loc[istation,'EIA_fieldcap'] = EIA_StorFields_temp.loc[matched[bestpick], 'Total Field Capacity(Mcf)']
                #GHGRP_temp_data.loc[istation,'Env_HP'] = Enverus_Trans_CompStations.loc[matched[bestpick], 'HP']
            else: #if there is more than one match, sum the field capacity from all matching stations and assign average to GHGRP array
                best_array = np.where(dist_calc == dist_calc.min())[0]
                #print(matched[best_array])
                #print(len(best_array))
                total_stor = 0.0
                nonzero_stor = 0
                for ibest in np.arange(0,len(best_array)):
                    if EIA_StorFields_temp.loc[matched[best_array[ibest]], 'Total Field Capacity(Mcf)'] > 0:
                        total_stor += EIA_StorFields_temp.loc[matched[best_array[ibest]], 'Total Field Capacity(Mcf)']
                        nonzero_stor += 1
                    EIA_StorFields_temp.loc[matched[best_array[ibest]],'GHGRP_match'] = 1
                GHGRP_temp_data.loc[istation,'match_flag'] = 1
                GHGRP_temp_data.loc[istation,'EIA_county'] = EIA_StorFields_temp.loc[matched[best_array[0]], 'County Name']
                GHGRP_temp_data.loc[istation,'EIA_state'] = EIA_StorFields_temp.loc[matched[best_array[0]], 'Report State ']
                GHGRP_temp_data.loc[istation,'EIA_fieldcap'] = data_fn.safe_div(total_stor,nonzero_stor)
        else:
            if GHGRP_temp_data.loc[istation,'Name'] == 'SNG Station 4020 Bear Creek Storage, LA':
                matched = np.where((EIA_StorFields_temp['Company Name'] == 'BEAR CREEK STORAGE COMPANY') & \
                                   (EIA_StorFields_temp['Report State '] == 'LA'))[0]
                EIA_StorFields_temp.loc[matched[0],'GHGRP_match'] = 1
                GHGRP_temp_data.loc[istation,'match_flag'] = 1
                GHGRP_temp_data.loc[istation,'EIA_name'] = EIA_StorFields_temp.loc[matched[0], 'Company Name']
                GHGRP_temp_data.loc[istation,'EIA_county'] = EIA_StorFields_temp.loc[matched[0], 'County Name']
                GHGRP_temp_data.loc[istation,'EIA_state'] = EIA_StorFields_temp.loc[matched[0], 'Report State ']
                GHGRP_temp_data.loc[istation,'EIA_fieldcap'] = EIA_StorFields_temp.loc[matched[0], 'Total Field Capacity(Mcf)']

            elif GHGRP_temp_data.loc[istation,'FID'] == 1009849:
                matched = np.where((EIA_StorFields_temp['Field Name'] == 'BOLING') & \
                                   (EIA_StorFields_temp['Report State '] == 'TX'))[0]
                #print(matched, istation)
                EIA_StorFields_temp.loc[matched[0],'GHGRP_match'] = 1
                GHGRP_temp_data.loc[istation,'match_flag'] = 1
                GHGRP_temp_data.loc[istation,'EIA_name'] = EIA_StorFields_temp.loc[matched[0], 'Company Name']
                GHGRP_temp_data.loc[istation,'EIA_county'] = EIA_StorFields_temp.loc[matched[0], 'County Name']
                GHGRP_temp_data.loc[istation,'EIA_state'] = EIA_StorFields_temp.loc[matched[0], 'Report State ']
                GHGRP_temp_data.loc[istation,'EIA_fieldcap'] = EIA_StorFields_temp.loc[matched[0], 'Total Field Capacity(Mcf)']

                
    GHGRP_notmatched = GHGRP_temp_data[GHGRP_temp_data['match_flag'] == 0]
    vars()['GHGRP_storstations'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()   
    #save a list of all the EIA fields with storage compressor stations that are not in the GHGRP dataset
    EIA_notmatched = EIA_StorFields_temp[(EIA_StorFields_temp['GHGRP_match'] == 0) & (EIA_StorFields_temp['Comp_flag'] == 1)]
    EIA_notmatched.reset_index(inplace=True, drop=True)
    vars()['EIA_StorCompStations_notmatched'+'_'+year_range_str[iyear]] = EIA_notmatched.copy()                          
            
    
    print('Year ', year_range_str[iyear],': ', len(GHGRP_notmatched), ' of ', len(GHGRP_temp_data))
    #display(GHGRP_notmatched)

##### Step 2.7.8 Calculate average ratio of emissions per total field capacity (for fields with compressor stations)

In [None]:
avg_emis_cap_ratio = np.zeros([num_years])

print('QA/QC: Average Emissions to Field Capacity Ratio')
for iyear in np.arange(0,num_years):
    GHGRP_temp_data = vars()['GHGRP_storstations'+'_'+year_range_str[iyear]].copy()

    GHGRP_temp_data['Emis_cap_ratio']=0
    for istation in np.arange(0,len(GHGRP_temp_data)):
        GHGRP_temp_data.loc[istation, 'Emis_cap_ratio'] = data_fn.safe_div(GHGRP_temp_data.loc[istation, 'TgCH4'], \
                                                                        GHGRP_temp_data.loc[istation, 'EIA_fieldcap'])
    GHGRP_temp_data['Emis_cap_ratio'] = GHGRP_temp_data['Emis_cap_ratio'].replace({0:np.nan})
    avg_emis_cap_ratio[iyear] = np.mean(GHGRP_temp_data['Emis_cap_ratio'])
    
    vars()['GHGRP_storstations'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()
    print('Year ', year_range_str[iyear],': ', avg_emis_cap_ratio[iyear])

##### Step 2.7.9 Assign emissions to grid

In [None]:
# map = GHGRP emissions for each station that matched, then take all the EIA compressor stations that didnt match,
# calculate emissions and then add to map
# AK/HI Note: Note that AK/HI compressor stations and storage fields are included in the Enverus, EIA, and GHGRP datasets. 
# Therefore, no further action is required to split out AK/HI emissions from the CONUS region, other than the code below, 
# which filters these emissions based on the locations of each compressor station

map_StorStations = np.zeros([len(Lat_01),len(Lon_01),num_years]) #data represent a snapshot in time that is applied to entire timeseries
map_StorStations_nongrid = np.zeros([num_years])

print('QA/QC: Storage Compressor Station Emissions Gridded:')
for iyear in np.arange(0, num_years):
    stations_ongrid = 0
    stations_nongrid = 0
    #first add GHGRP emissions for matched stations
    GHGRP_temp_data = vars()['GHGRP_storstations'+'_'+year_range_str[iyear]].copy()
    for istation in np.arange(0,len(GHGRP_temp_data)):
        if GHGRP_temp_data.loc[istation,'match_flag']==1:
            if GHGRP_temp_data['Lon'][istation] > Lon_left and GHGRP_temp_data['Lon'][istation] < Lon_right \
                and GHGRP_temp_data['Lat'][istation] > Lat_low and GHGRP_temp_data['Lat'][istation] < Lat_up:
                ilat = int((GHGRP_temp_data['Lat'][istation] - Lat_low)/Res01)
                ilon = int((GHGRP_temp_data['Lon'][istation] - Lon_left)/Res01)
                #if Env_ProcPlant_loc['Throughput'][iplant] >0:
                map_StorStations[ilat,ilon,iyear] += GHGRP_temp_data.loc[istation, 'TgCH4']
                stations_ongrid +=1
            else:
                map_StorStations_nongrid[iyear] += GHGRP_temp_data.loc[istation, 'TgCH4']  
                stations_nongrid +=1

    #then add calculated EIA emissions for all non-matched storage compressor stations
    EIA_temp_data = vars()['EIA_StorCompStations_notmatched'+'_'+year_range_str[iyear]].copy()
    for istation in np.arange(0, len(EIA_temp_data)):
        if EIA_temp_data['Lon'][istation] > Lon_left and EIA_temp_data['Lon'][istation] < Lon_right \
            and EIA_temp_data['Lat'][istation] > Lat_low and EIA_temp_data['Lat'][istation] < Lat_up:
            ilat = int((EIA_temp_data['Lat'][istation] - Lat_low)/Res01)
            ilon = int((EIA_temp_data['Lon'][istation] - Lon_left)/Res01)
            map_StorStations[ilat,ilon,iyear] += EIA_temp_data.loc[istation,'Total Field Capacity(Mcf)']*avg_emis_cap_ratio[iyear]
            stations_ongrid +=1
        else:
            map_StorStations_nongrid[iyear] += EIA_temp_data.loc[istation,'Total Field Capacity(Mcf)']*avg_emis_cap_ratio[iyear]
            stations_nongrid +=1
            
    vars()['GHGRP_storstations'+'_'+year_range_str[iyear]] = GHGRP_temp_data.copy()
    vars()['EIA_StorCompStations_notmatched'+'_'+year_range_str[iyear]] = EIA_temp_data.copy()
    
    print('Year: ', year_range_str[iyear])
    print('On grid (Tg): ',np.sum(map_StorStations[:,:, iyear]), ', stations:', stations_ongrid)
    print('Off grid (Tg): ',np.sum(map_StorStations_nongrid[iyear]), ', stations:', stations_nongrid)

#### Step 2.8 - Make Storage Well Proxy

In [None]:
#Read in EIA storage field timeseries data
# Read in EIA storage field location data
# assign lat/lon to timeseries data based on matching the gas field code

#place data onto grid


In [None]:
#Make array of gas storage capacities at EIA underground gas storage facilities
# Get gas capacities at each field overtime from the full 191 survey data (EIA)
# Get lat/lons of storage facilities (for the current year only) from the EIA data explorer dataset
# Match the fields based on EIA field code to get the locations of all fields over time

Map_Storage = np.zeros([len(Lat_01),len(Lon_01),num_years])
Map_Storage_nongrid = np.zeros([num_years])

#Re-load EIA Storage Field Capacities
names = pd.read_excel(EIA_StorFields_inputfile, skiprows = 0, header = 0)
colnames = names.columns.values
EIA_StorFields = pd.read_excel(EIA_StorFields_inputfile, skiprows = 0, names = colnames)
EIA_StorFields = EIA_StorFields[['Year','Gas Field Code','Report State ','Status','Reservoir Code','Total Field Capacity(Mcf)']]
# filter for active storage fields only
EIA_StorFields = EIA_StorFields[EIA_StorFields['Status']== 'Active']
EIA_StorFields.reset_index(drop=True,inplace=True)
#display(EIA_StorFields)

#load field locations
names = pd.read_excel(EIA_StorFields_locs_inputfile, skiprows = 0, header = 0)
colnames = names.columns.values
EIA_StorFields_locs = pd.read_excel(EIA_StorFields_locs_inputfile, skiprows = 0, names = colnames)
EIA_StorFields_locs = EIA_StorFields_locs[['fld_code','res_code','Longitude','Latitude']]
EIA_StorFields_locs.reset_index(drop=True,inplace=True)
#display(EIA_StorFields_locs)

#find lat/lon values from the locations array (matching based on gas field code)
EIA_StorFields['Lat'] = 0
EIA_StorFields['Lon'] = 0

for ifield in np.arange(0, len(EIA_StorFields)):
    imatch = np.where(EIA_StorFields_locs['fld_code'] == EIA_StorFields.loc[ifield,'Gas Field Code'])[0]
    if len(imatch) == 1:
        EIA_StorFields.loc[ifield,'Lat'] = EIA_StorFields_locs.loc[imatch[0],'Latitude']
        EIA_StorFields.loc[ifield,'Lon'] = EIA_StorFields_locs.loc[imatch[0],'Longitude']                    
    elif len(imatch) > 1: 
        new_match = np.where((EIA_StorFields_locs['fld_code'] == EIA_StorFields.loc[ifield,'Gas Field Code']) &\
                             (EIA_StorFields_locs['res_code'] == EIA_StorFields.loc[ifield,'Reservoir Code']))[0]
        #print(new_match)
        #display(EIA_StorFields.iloc[imatch,:])
        if len(new_match) >0:
            EIA_StorFields.loc[ifield,'Lat'] = EIA_StorFields_locs.loc[new_match[0],'Latitude']
            EIA_StorFields.loc[ifield,'Lon'] = EIA_StorFields_locs.loc[new_match[0],'Longitude']
        #if len(new_match) ==0:
        #    display(EIA_StorFields.iloc[ifield,:])
for iyear in np.arange(0, num_years):
    fields_ongrid = 0
    fields_nongrid = 0
    temp_data = EIA_StorFields[EIA_StorFields['Year'] == year_range[iyear]]
    temp_data.reset_index(drop=True,inplace=True)               
    for ifield in np.arange(0,len(temp_data)):
        if temp_data['Lon'][ifield] > Lon_left and temp_data['Lon'][ifield] < Lon_right \
            and temp_data['Lat'][ifield] > Lat_low and temp_data['Lat'][ifield] < Lat_up:
            ilat = int((temp_data['Lat'][ifield] - Lat_low)/Res01)
            ilon = int((temp_data['Lon'][ifield] - Lon_left)/Res01)
            Map_Storage[ilat,ilon,iyear] += temp_data.loc[ifield, 'Total Field Capacity(Mcf)']
            fields_ongrid +=1
        else:
            if temp_data.loc[ifield, 'Report State '] in ('|'.join(['AK','HI'])): 
                #only include AK/HI fields in the 'non-grid' caetgory. This essentially ignores any fields where locations were not found
                Map_Storage_nongrid[iyear] += temp_data.loc[ifield, 'Total Field Capacity(Mcf)']  
                #display(temp_data.iloc[ifield,:])
                fields_nongrid +=1
    print('Year',year_range[iyear])
    print('Total Gas Capacity (mcf) ongrid: ', np.sum(Map_Storage[:,:,iyear]), 'fields:',fields_ongrid)
    print('Total Gas Capacity (mcf) offgrid:', np.sum(Map_Storage_nongrid[iyear]),'fields:',fields_nongrid)

----------------
## Step 3. Read In EPA GHGI Data
---------------

### Step 3.1. Transmission & Storage Emissions

In [None]:
# Emissions are in units of MG (= 1x10-6 Tg)

names = pd.read_excel(EPA_NG_inputfile, sheet_name = "Inventory Emissions", usecols = "A:AG", skiprows = 5, header = 0, nrows = 1)
colnames = names.columns.values
EPA_emi_ts_NG = pd.read_excel(EPA_NG_inputfile, sheet_name = "Inventory Emissions", usecols = "A:AG", skiprows = 149, names = colnames, nrows = 54)
EPA_emi_ts_NG= EPA_emi_ts_NG.drop(columns = ['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 3'])
EPA_emi_ts_NG['Source']= EPA_emi_ts_NG['Source'].str.replace(r"\(","")
EPA_emi_ts_NG['Source']= EPA_emi_ts_NG['Source'].str.replace(r"\)","")
EPA_emi_ts_NG['Source']= EPA_emi_ts_NG['Source'].str.replace(r"+","")
EPA_emi_ts_NG = EPA_emi_ts_NG.fillna('')
EPA_emi_ts_NG = EPA_emi_ts_NG.drop(columns = [*range(1990, start_year,1)])
EPA_emi_ts_NG.reset_index(inplace=True, drop=True)
display(EPA_emi_ts_NG)

### Step 3.1.2. Read in Total Transmission and Storage Emissions

In [None]:
# Read in total Transmission and Storage emissions (with methane reductions accounted for)
# data are in kt

names = pd.read_excel(EPA_NG_inputfile, sheet_name = "SUMMARY CH4", usecols = "A:AD", skiprows = 10, header = 0, nrows = 1)
colnames = names.columns.values
EPA_emi_total_NG_CH4 = pd.read_excel(EPA_NG_inputfile, sheet_name = "SUMMARY CH4", usecols = "A:AD", skiprows = 17, names = colnames, nrows = 5)
EPA_emi_total_NG_CH4.rename(columns={EPA_emi_total_NG_CH4.columns[0]:'Source'}, inplace=True)
EPA_emi_total_NG_CH4 = EPA_emi_total_NG_CH4.drop(columns = [*range(1990, start_year,1)])
EPA_emi_total_NG_CH4.reset_index(inplace=True, drop=True)

print("EPA GHGI Emissions with Reductions (kt)")
display(EPA_emi_total_NG_CH4)

##### Step 3.1.3 Read in and Format NG GasSTAR Reductions (kt)

In [None]:
# Read in and format Gas STAR reductions data (units of Mg, converted here to kt)
# For NG CH4, current reductions include those for Gas Engines, Compressor Starts, and 'Other'

# get column names from top of spreadsheet
col_range = 'A:AG'
names = pd.read_excel(EPA_NG_inputfile, sheet_name = "Gas STAR Reductions", usecols = col_range, skiprows = 5, header = 0, nrows = 1)
colnames = names.columns.values

# Load full Gas STAR page and save required reductions
EPA_Gas_STAR_NG_CH4 = pd.read_excel(EPA_NG_inputfile, sheet_name = "Gas STAR Reductions", usecols = col_range, skiprows = 76, names = colnames, nrows = 57)
EPA_Gas_STAR_NG_CH4 = EPA_Gas_STAR_NG_CH4.fillna('')
EPA_Gas_STAR_NG_CH4['Source']= EPA_Gas_STAR_NG_CH4['Source'].str.replace(r"\(","")
EPA_Gas_STAR_NG_CH4['Source']= EPA_Gas_STAR_NG_CH4['Source'].str.replace(r"\)","")
EPA_Gas_STAR_NG_CH4['Source']= EPA_Gas_STAR_NG_CH4['Source'].str.replace(r"+","")
EPA_Gas_STAR_NG_CH4 = EPA_Gas_STAR_NG_CH4[EPA_Gas_STAR_NG_CH4['Unnamed: 0'].str.contains('Engines|Other')]
EPA_Gas_STAR_NG_CH4= EPA_Gas_STAR_NG_CH4.drop(columns = ['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 3'])
EPA_Gas_STAR_NG_CH4 = EPA_Gas_STAR_NG_CH4.drop(columns = [*range(1990, start_year,1)])
EPA_Gas_STAR_NG_CH4.reset_index(inplace=True, drop = True)
EPA_Gas_STAR_NG_CH4.loc[1,'Source'] = 'Other'
print('EPA GHGI Gas STAR Reductions (row 0-1 in Mg):')
display(EPA_Gas_STAR_NG_CH4)

#Last row (OTHER REDUCTIONS) apply to pipelines, dehydrator vents, and transmission station venting 

In [None]:
#Need to break down 'Other' reductions into their subcateogries (2013 data held constant moving forward)
# NOTE: This code can be changed in future years where this 'other' category is broken down in the Gas STAR tab
# NOTE: Current code has hardcoded rows (not ideal and should be changed in the future)

# get column names from top of spreadsheet
col_range = 'A:AF'
names = pd.read_excel(EPA_NG_inputfile, sheet_name = "Data Input", usecols = col_range, skiprows = 5, header = 0, nrows = 1)
colnames = names.columns.values

# Load full Gas STAR page and save required reductions
EPA_input_gasstar = pd.read_excel(EPA_NG_inputfile, sheet_name = "Data Input", usecols = col_range, skiprows = 86, names = colnames, nrows = 16)
EPA_input_gasstar = EPA_input_gasstar.fillna('')
EPA_input_gasstar['Input']= EPA_input_gasstar['Input'].str.replace(r"\(","")
EPA_input_gasstar['Input']= EPA_input_gasstar['Input'].str.replace(r"\)","")
EPA_input_gasstar['Input']= EPA_input_gasstar['Input'].str.replace(r"+","")
EPA_input_gasstar= EPA_input_gasstar.drop(columns = ['Input No.', 'Units'])
EPA_input_gasstar.reset_index(inplace=True, drop = True)
#display(EPA_input_gasstar)

pipeline_red = EPA_input_gasstar[EPA_input_gasstar['Input'].str.contains('Transmission: Pipeline Leaks')]
pipeline_red = pipeline_red.drop(columns = [*range(1990, start_year,1)])
pipeline_red = pipeline_red.iloc[:,1:].sum(axis=0)

dehy_vents_red = EPA_input_gasstar[EPA_input_gasstar['Input'].str.contains('Dehy Vents 1 Year|Dehy Vents Ongoing')]
dehy_vents_red.reset_index(inplace=True, drop = True)
dehy_vents_red.iloc[1,1:] = dehy_vents_red.iloc[1,1:].cumsum()
dehy_vents_red = dehy_vents_red.drop(columns = [*range(1990, start_year,1)])
dehy_vents_red = dehy_vents_red.iloc[:,1:].sum(axis=0)

stat_vent_red = EPA_input_gasstar[EPA_input_gasstar['Input'].str.contains('Stat Vent 1 Year|Stat Vent Ongoing')]
stat_vent_red.reset_index(inplace=True, drop = True)
stat_vent_red.iloc[1,1:] = stat_vent_red.iloc[1,1:].cumsum()
stat_vent_red = stat_vent_red.drop(columns = [*range(1990, start_year,1)])
stat_vent_red = stat_vent_red.iloc[:,1:].sum(axis=0)

#set all later years to 2013 values
for iyear in np.arange(2,num_years):
    pipeline_red[year_range[iyear]] = pipeline_red[2013]
    dehy_vents_red[year_range[iyear]] = dehy_vents_red[2013]
    stat_vent_red[year_range[iyear]] = stat_vent_red[2013]
    
#covert values to correct units & reset index
pipeline_red = pipeline_red*(19.26/1000)
dehy_vents_red = dehy_vents_red*(19.26/1000)
stat_vent_red = stat_vent_red*(19.26/1000)
pipeline_red.reset_index(inplace=True, drop = True)
dehy_vents_red.reset_index(inplace=True, drop = True)
stat_vent_red.reset_index(inplace=True, drop = True)


In [None]:
# Apply GasSTAR reductions to emissions from 
# Gas Engines, Pipelines, transmission station venting, and Dehydrator Vents. 
## NOTE: negative values are a known issue by ERG (updates are currently being made to the 2022 GHGI) 
# units in Mg

print('Net Corrected National Emissions (Mg)')
#correct gas engine emissions (subtract the reduction from national totals)
emi_temp = EPA_emi_ts_NG[EPA_emi_ts_NG['Source'] == 'Engines Transmission']
emi_temp.reset_index(inplace=True, drop = True)
red_temp = EPA_Gas_STAR_NG_CH4[EPA_Gas_STAR_NG_CH4['Source'].str.contains('Engines Transmission')]
for iyear in np.arange(0,num_years):
    EPA_emi_ts_NG.loc[EPA_emi_ts_NG['Source']=='Engines Transmission',year_range[iyear]] = emi_temp.loc[0,year_range[iyear]]- red_temp.loc[0,year_range[iyear]]
display(EPA_emi_ts_NG.loc[EPA_emi_ts_NG['Source']=='Engines Transmission', :])

#correct pipeline leak emissions
emi_temp = EPA_emi_ts_NG[EPA_emi_ts_NG['Source'] == 'Pipeline Leaks']
emi_temp.reset_index(inplace=True, drop = True)
red_temp = pipeline_red
for iyear in np.arange(0,num_years):
    EPA_emi_ts_NG.loc[EPA_emi_ts_NG['Source']=='Pipeline Leaks',year_range[iyear]] = emi_temp.loc[0,year_range[iyear]]- red_temp[iyear] 
display(EPA_emi_ts_NG[EPA_emi_ts_NG['Source'] == 'Pipeline Leaks'])

#correct Dehydrator vent emissions
emi_temp = EPA_emi_ts_NG[EPA_emi_ts_NG['Source'] == 'Dehydrator vents Transmission']
emi_temp.reset_index(inplace=True, drop = True)
red_temp = dehy_vents_red
for iyear in np.arange(0,num_years):
    EPA_emi_ts_NG.loc[EPA_emi_ts_NG['Source']=='Dehydrator vents Transmission',year_range[iyear]] = emi_temp.loc[0,year_range[iyear]]- red_temp[iyear] 
display(EPA_emi_ts_NG[EPA_emi_ts_NG['Source'] == 'Dehydrator vents Transmission'])

#correct Dehydrator vent emissions
emi_temp = EPA_emi_ts_NG[EPA_emi_ts_NG['Source'] == 'Station Venting Transmission']
emi_temp.reset_index(inplace=True, drop = True)
red_temp = stat_vent_red
for iyear in np.arange(0,num_years):
    EPA_emi_ts_NG.loc[EPA_emi_ts_NG['Source']=='Station Venting Transmission',year_range[iyear]] = emi_temp.loc[0,year_range[iyear]]- red_temp[iyear] 
display(EPA_emi_ts_NG[EPA_emi_ts_NG['Source'] == 'Station Venting Transmission'])

##### Step 3.1.4. Read In and Format NG Regulation Reductions (kt)

In [None]:
# There are no transmission and storage regulatory emission reductions 

#### 3.2. Split Generator Emissions into Storage vs. Transmission contributions

In [None]:
# Calculate the fraction of generator emissions that occur at transmission vs. storage compressor stations
# Calculate as the average ratio of the horsepower of engines and turbines at transmission stations relative to at storage stations
# In the GHGI: Horsepower data is calcualted for 1992 based on GRI study. Factors relative to 1992 are applied to 1992 values to complete the timeseries

names = pd.read_excel(EPA_NG_inputfile, sheet_name = "Activity Factors", usecols = "B:AI", skiprows = 6, header = 0, nrows = 1)
colnames = names.columns.values
EPA_Gen_AD = pd.read_excel(EPA_NG_inputfile, sheet_name = "Activity Factors", usecols = "B:AI", skiprows = 47, names = colnames, nrows = 4)
EPA_Gen_AD['Source']= EPA_Gen_AD['Source'].str.replace(r"\(","")
EPA_Gen_AD['Source']= EPA_Gen_AD['Source'].str.replace(r"\)","")
EPA_Gen_AD = EPA_Gen_AD.drop(columns = ['Unnamed: 1','Unnamed: 2', 'Unnamed: 3', 'Units'])
EPA_Gen_AD = EPA_Gen_AD.drop(columns = [*range(1990, start_year,1)])
EPA_Gen_AD.reset_index(inplace=True, drop=True)

frac_gen_trans = np.zeros([num_years])
 
eng_trans = EPA_Gen_AD.loc[EPA_Gen_AD['Source'].str.contains('Engines Transmission')]
turb_trans = EPA_Gen_AD.loc[EPA_Gen_AD['Source'].str.contains('Turbines Transmission')]
eng_stor = EPA_Gen_AD.loc[EPA_Gen_AD['Source'].str.contains('Engines Storage')]
turb_stor = EPA_Gen_AD.loc[EPA_Gen_AD['Source'].str.contains('Turbines Storage')]

print('Fraction of Generator Emissions from Transmission Stations (relative to Storage Stations):')
for iyear in np.arange(0, num_years):
    frac_gen_trans[iyear] = ((eng_trans.iloc[0,iyear+1]/(eng_trans.iloc[0,iyear+1] + eng_stor.iloc[0,iyear+1])) +\
                            (turb_trans.iloc[0,iyear+1]/(turb_trans.iloc[0,iyear+1] + turb_stor.iloc[0,iyear+1]))) / 2
    
    print('Year', year_range_str[iyear], ':', frac_gen_trans[iyear])

#### 3.3. Split Emissions into Gridding Groups (each Group will have the same proxy applied during the gridding)

In [None]:
# Final Emissions in Units of kt
# Use mapping proxy and source files to split the GHGI emissions
DEBUG=1

start_year_idx = EPA_emi_ts_NG.columns.get_loc(start_year)
end_year_idx = EPA_emi_ts_NG.columns.get_loc(end_year)+1
sum_emi = np.zeros(num_years)

ghgi_ts_groups = ghgi_ts_map['GHGI_Emi_Group'].unique()

for igroup in np.arange(0,len(ghgi_ts_groups)): #loop through all groups, finding the GHGI sources in that group and summing emissions for that region, year
        vars()[ghgi_ts_groups[igroup]] = np.zeros([num_years])
        source_temp = ghgi_ts_map.loc[ghgi_ts_map['GHGI_Emi_Group'] == ghgi_ts_groups[igroup], 'GHGI_Source']
        pattern_temp  = '|'.join(source_temp)
        ##DEBUG## display(pattern_temp)
        emi_temp = EPA_emi_ts_NG[EPA_emi_ts_NG['Source'].str.contains(pattern_temp)]
        # make sure to use the correct transmission and storage station data for the correct GHGI group
        if 'Station Total Emissions' in pattern_temp:
            if 'Trans' in ghgi_ts_groups[igroup]:
                emi_temp = emi_temp.drop(emi_temp.index[1])
            elif 'Stat' in ghgi_ts_groups[igroup]:
                emi_temp = emi_temp.drop(emi_temp.index[0])
        ##DEBUG## display(emi_temp)
        vars()[ghgi_ts_groups[igroup]][:] = np.where(emi_temp.iloc[:,start_year_idx:] =='',[0],emi_temp.iloc[:,start_year_idx:]).sum(axis=0)/float(1000) #convert Mg to kt

#Check against total summary emissions 
print('QA/QC #1: Check Transmission and Storage Emission Sum against GHGI Summary Emissions')
for iyear in np.arange(0,num_years): 
    for igroup in np.arange(0,len(ghgi_ts_groups)):
        sum_emi[iyear] += vars()[ghgi_ts_groups[igroup]][iyear]       
    summary_emi = EPA_emi_total_NG_CH4.iloc[3,iyear+1]  
    #Check 1 - make sure that the sums from all the regions equal the totals reported
    diff1 = abs(sum_emi[iyear] - summary_emi)/((sum_emi[iyear] + summary_emi)/2)
    if DEBUG==1:
        print(summary_emi)
        print(sum_emi[iyear])
    if diff1 < 0.0001:
        print('Year ', year_range[iyear],': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear],': FAIL (check Production & summary tabs): ', diff1,'%') 

----------------
## Step 4. Grid Data (using spatial proxies)
---------------

### Step. 4.1. Calculate the monthly and regional weighted arrays

#### Step 4.1.1 Assign the Appropriate Proxy Variable Names

In [None]:
# The names on the *left* need to match the 'NaturalGas_TransmissionStorage_ProxyMapping' 'Proxy_Group' names 
# (these are initialized in Step 2). 
# The names on the right are the variable names used to caluclate the proxies in this code.
# Names on the *right* need to match those from the code in Step 2.

#
Map_TransCompStations = map_TransCompStations
Map_StorageCompStations = map_StorStations
Map_TransPipelines = Map_EnvTrans_pipelines
Map_InputTerminals = map_InputTerminals
Map_ExportTerminals = map_ExportTerminals
Map_StorageWells = Map_Storage
Map_FarmPipelines = Map_Farm_pipelines#Map_EnvTrans_pipelines 
Map_LNGStorage = Map_LNGstations
Map_Generators = np.ones([len(Lat_01),len(Lon_01),num_years])  ## National Emissions split into Transmission and Storage contributions below

Map_TransCompStations_nongrid = map_TransCompStations_nongrid
Map_StorageCompStations_nongrid = map_StorStations_nongrid
Map_TransPipelines_nongrid = Map_EnvTrans_pipelines_nongrid
Map_InputTerminals_nongrid = map_InputTerminals_nongrid
Map_ExportTerminals_nongrid = map_ExportTerminals_nongrid
Map_StorageWells_nongrid = Map_Storage_nongrid
Map_FarmPipelines_nongrid = Map_Farm_pipelines_nongrid#Map_EnvTrans_pipelines_nongrid gg
Map_LNGStorage_nongrid = Map_LNGstations_nongrid
Map_Generators_nongrid = np.ones([num_years]) ## National Emissions split into Transmission and Storage contributions below


#### Step 4.1.2 Calculate the fractional proxies

In [None]:
# Calculate weighting arrays
# Find the fraction of processing plants in each grid cell, relative to the total counts (on and off grid)
# also weight by the number of days in each year

proxy_ts_map_unique = np.unique(proxy_ts_map['Proxy_Group'])

for iyear in np.arange(0,num_years):
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
        month_days = month_day_leap
    else:
        year_days = np.sum(month_day_nonleap)
        month_days = month_day_nonleap  
    
    #Step 1a: weighted proxy ongrid = ongrid proxy * days each year
    #Step 1b: weighted proxy offgrid = offgrid proxy * days each year
    #Step 2a: noramlized weighted proxy ongrid = weighted proxy in each grid cell / (sum weighted proxy ongrid + weighted proxy offgrid)
    #Step 2b: noramlized weighted proxy offgrid = weighted proxy offgrid / (sum weighted proxy ongrid + weighted proxy offgrid)
    print('Check Sum of T & S Proxy Arrays = 1 for: ', year_range[iyear])
    for iproxy in np.arange(0,len(proxy_ts_map_unique)):
        vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']][:,:,iyear] *= np.sum(month_days)
        vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear] *= np.sum(month_days)
        temp_sum = float(np.sum(vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']][:,:,iyear]) + \
                    np.sum(vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear]))
        vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']][:,:,iyear] = \
                    data_fn.safe_div(vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']][:,:,iyear], temp_sum)
        vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear] = \
                    data_fn.safe_div(vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear], temp_sum)
        proxy_sum = np.sum(vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']][:,:,iyear])+np.sum(vars()[proxy_ts_map.loc[iproxy,'Proxy_Group']+'_nongrid'][iyear])
        if proxy_sum >1.0001 or proxy_sum <0.9999:
            print('CHECK ', proxy_ts_map.loc[iproxy,'Proxy_Group'],': ', proxy_sum)   
        else:
            print('PASS:', proxy_ts_map.loc[iproxy,'Proxy_Group'])

### Step. 4.2. Grid the National Emissions Data, then Calculate 0.1x0.1 degree flux maps

In [None]:
# Calculate emissions as Emissions = GHGI emissions * Proxy Map

# For transmission pipelines and Transmission compressor stations, AK/HI fraction of National emissions needs to be removed
# prior to gridding
# For national generator emissions, emissions need to be split between the transmission and storage station categories
# (based on relative activity data from the National GHGI) and then gridded to each compressor station category accordingly

Emissions = np.zeros([len(Lat_01),len(Lon_01),num_years])
Emissions_nongrid = np.zeros([num_years])
Emi_not_mapped_sum = np.zeros(num_years)
CONUS_red= np.zeros(num_years)
DEBUG =1

#loop through each emission group, where: Gridded emissions = National emissions * proxy map
for igroup in np.arange(0,len(proxy_ts_map)):
    vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']] = np.zeros([len(Lat_01),len(Lon_01),num_years])
    vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'] = np.zeros([num_years])
    for iyear in np.arange(0,num_years):
        if proxy_ts_map.loc[igroup,'Proxy_Group'] == 'Map_TransCompStations': 
            #Remove AK/HI fraction before gridding
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += \
                     (vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear] -\
                     (vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear]* CONUS_transstat_ratio[iyear])) * \
                     vars()[proxy_ts_map.loc[igroup,'Proxy_Group']][:,:,iyear]
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear] += vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear]* CONUS_transstat_ratio[iyear]
        elif proxy_ts_map.loc[igroup,'Proxy_Group'] == 'Map_TransPipelines': 
            #Remove AK/HI fraction before gridding
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += \
                     (vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear] -\
                     (vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear]* CONUS_transpipe_ratio)) * \
                     vars()[proxy_ts_map.loc[igroup,'Proxy_Group']][:,:,iyear]
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear] += vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear]* CONUS_transpipe_ratio
        elif proxy_ts_map.loc[igroup,'Proxy_Group'] == 'Map_Generators': 
            #need to split between Storage & Trans stations
            # = GHGI * Trans frac * Map_TransCompStations + GHGI * Storage frac * Map_StatCompStations
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += \
                (vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear] * frac_gen_trans[iyear] * Map_TransCompStations[:,:,iyear]) +\
                (vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear] * (1-frac_gen_trans[iyear]) * Map_StorageCompStations[:,:,iyear])
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear] += \
                (vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear] * frac_gen_trans[iyear] * Map_TransCompStations_nongrid[iyear]) +\
                (vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear] * (1-frac_gen_trans[iyear]) * Map_StorageCompStations_nongrid[iyear])

        elif proxy_ts_map.loc[igroup,'Proxy_Group'] == 'Map_StorageWells':
            #deal with Aliso Canyon storage event (one time event added to 'Inventory Emissions' tab in GHGI workbook)
            #allocate Aliso Canyon emissions to the appropriate grid cell
            #lat/lon of ALiso Canyon storage facility (from google maps)
            ilat = int((34.31307 - Lat_low)/Res01)
            ilon = int((-118.56462 - Lon_left)/Res01)
            aliso_red = 0
            if year_range[iyear] == 2015:
                aliso_red = 78.350 #kt
            elif year_range[iyear]==2016:
                aliso_red = 21.288 #kt
            ghgi_temp = vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear]
            ghgi_temp -= aliso_red
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][ilat,ilon,iyear] += aliso_red
            #allocate remaining emissions based on storage well proxy
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += \
                ghgi_temp * vars()[proxy_ts_map.loc[igroup,'Proxy_Group']][:,:,iyear]
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear] += \
                ghgi_temp * vars()[proxy_ts_map.loc[igroup,'Proxy_Group']+'_nongrid'][iyear]
        else:
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] += \
                vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear] * \
                vars()[proxy_ts_map.loc[igroup,'Proxy_Group']][:,:,iyear]
            vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear] += \
                vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear] * \
                vars()[proxy_ts_map.loc[igroup,'Proxy_Group']+'_nongrid'][iyear]
        
        #DEBUG# print(igroup)
        #DEBU# print(vars()[proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][iyear])
        #DEBUG# print(np.sum(vars()['Flux_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear])+\
        #DEBUG #        vars()['Flux_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear])
        #if proxy_ts_map.loc[igroup,'Proxy_Group'] == 'Map_StorageWells':

        Emissions[:,:,iyear] += vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
        Emissions_nongrid[iyear] += vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_nongrid'][iyear]

    
# QA/QC gridded emissions
# Check sum of all gridded emissions + emissions not included in gridding (e.g., AK), and other non-gridded areas
print('QA/QC #1: Check weighted emissions against GHGI')   
for iyear in np.arange(0,num_years):
    calc_emi = 0
    summary_emi = EPA_emi_total_NG_CH4.iloc[3,iyear+1]
    calc_emi =  np.sum(Emissions[:,:,iyear]) + Emissions_nongrid[iyear] # +Emi_not_mapped_sum[iyear]# 
    if DEBUG ==1:
        print(summary_emi)
        print(calc_emi)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

#### Step 4.2.2 Save gridded emissions (kt)

In [None]:
#save gridded emissions for each gridding group - for extension

#Initialize file
data_IO_fn.initialize_netCDF(grid_emi_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

unique_groups = np.unique(proxy_ts_map['GHGI_Emi_Group'])
unique_groups = unique_groups[unique_groups != 'Emi_not_mapped']

nc_out = Dataset(grid_emi_outputfile, 'r+', format='NETCDF4')

for igroup in np.arange(0,len(unique_groups)):
    print('Ext_'+unique_groups[igroup])
    if len(np.shape(vars()['Ext_'+unique_groups[igroup]])) ==4:
        ghgi_temp = np.sum(vars()[unique_groups[igroup]],axis=3) #sum month data if data is monthly
    else:
        ghgi_temp = vars()['Ext_'+unique_groups[igroup]]

    # Write data to netCDF
    data_out = nc_out.createVariable('Ext_'+unique_groups[igroup], 'f8', ('lat', 'lon','year'), zlib=True)
    data_out[:,:,:] = ghgi_temp[:,:,:]

#save nongrid data to calculate non-grid fraction extension
data_out = nc_out.createVariable('Emissions_nongrid', 'f8', ('year'), zlib=True)  
data_out[:] = Emissions_nongrid[:]
nc_out.close()

#Confirm file location
print('** SUCCESS **')
print("Gridded emissions (kt) written to file: {}" .format(os.getcwd())+grid_emi_outputfile)
print(' ')

del data_out, ghgi_temp, nc_out

### Step 4.3 Calculate Gridded Fluxes (molec/s/cm2)

In [None]:
#Step 2 -- Calculate fluxes (molec./s/cm2)
DEBUG =1

#Initialize arrays
check_sum_annual = np.zeros([num_years])
Flux_Emissions_Total_annual = np.zeros([len(Lat_01),len(Lon_01),num_years])
for igroup in np.arange(0,len(proxy_ts_map)):
    vars()['Flux_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_annual'] = np.zeros([len(Lat_01),len(Lon_01),num_years])


#Calculate fluxes
for iyear in np.arange(0,num_years):
    if year_range[iyear]==2012 or year_range[iyear]==2016:
        year_days = np.sum(month_day_leap)
        month_days = month_day_leap
    else:
        year_days = np.sum(month_day_nonleap)
        month_days = month_day_nonleap 
    
    # calculate fluxes for annual data  (=kt * grams/kt *molec/mol *mol/g *s^-1 * cm^-2)
    conversion_factor_annual = 10**9 * Avogadro / float(Molarch4 * np.sum(month_days) * 24 * 60 *60) / area_matrix_01
    for igroup in np.arange(0,len(proxy_ts_map)):
        vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear] *= conversion_factor_annual
        vars()['Flux_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']+'_annual'][:,:,iyear] = vars()['Ext_'+proxy_ts_map.loc[igroup,'GHGI_Emi_Group']][:,:,iyear]
        Flux_Emissions_Total_annual[:,:,iyear] = Emissions[:,:,iyear]*conversion_factor_annual
    check_sum_annual[iyear] += np.sum(Flux_Emissions_Total_annual[:,:,iyear]/conversion_factor_annual) #convert back to emissions to check at end

print(' ')
print('QA/QC #2: Check final gridded fluxes against GHGI')  
# for the sum, check the converted annual emissions (convert back from flux) plus all the non-gridded emissions
for iyear in np.arange(0,num_years):
    calc_emi = check_sum_annual[iyear] + Emissions_nongrid[iyear]
    summary_emi = EPA_emi_total_NG_CH4.iloc[3,iyear+1]
    if DEBUG ==1:
        print(calc_emi)
        print(summary_emi)
    diff = abs(summary_emi-calc_emi)/((summary_emi+calc_emi)/2)
    if diff < 0.0001:
        print('Year ', year_range[iyear], ': PASS, difference < 0.01%')
    else:
        print('Year ', year_range[iyear], ': FAIL -- Difference = ', diff*100,'%')

-------------
## Step 5. Write gridded (0.1⁰x0.1⁰) data to netCDF files.
-------------

In [None]:
# Initialize netCDF files
data_IO_fn.initialize_netCDF(gridded_outputfile, netCDF_description, 0, year_range, loc_dimensions, Lat_01, Lon_01)

# Write the Data to netCDF
nc_out = Dataset(gridded_outputfile, 'r+', format='NETCDF4')
nc_out.variables['emi_ch4'][:,:,:] = Flux_Emissions_Total_annual
nc_out.close()
#Confirm file location
print('** SUCCESS **')
print("Gridded annual natural gas transmission & storage fluxes written to file: {}" .format(os.getcwd())+gridded_outputfile)
print('')

-------------
## Step 6. Plot Data
-------------

#### 6.1 Plot Annual Emission Fluxes

In [None]:
# Plot annual emissions for each year
scale_max = 10
save_fig = 0
save_file = ''
data_plot_fn.plot_annual_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_str, scale_max, save_fig, save_file)

#### 6.2 Plot Difference Between First and Last Inventory Year

In [None]:
save_flag = 0
save_outfile = ''
data_plot_fn.plot_diff_emission_flux_map(Flux_Emissions_Total_annual, Lat_01, Lon_01, year_range, title_diff_str,save_flag,save_outfile)

#### 6.3 Plot Activity Data 

In [None]:
# Plot Location Points

# Activity_Map = 0.1x0.1 map of activity data (counts or absolute units)
# Plot_Frac    = 0 or 1 (0= plot activity data in absolute counts, 1= plot fractional activity data)
# Lat          = 0.1 degree Lat values (select range)
# Lon          = 0.1 degree Lon values (select range)
# year_range   = array of inventory years
# title_str    = title of map
# legend_str   = title of legend
# scale_max    = maximum of color scale

Activity_Map = Map_LNGstations#Map_Plants
Plot_Frac = 1
Lat = Lat_01
Lon = Lon_01
year_range = year_range
title_str2 = "Proxy - LNG Storage Station Capacity"
legend_str = "Annual Fraction of National LNG Storage Station Capacity"
scale_max = 0.05

for iyear in np.arange(0,len(year_range)): 
    my_cmap = copy(plt.cm.get_cmap('rainbow',lut=3000))
    my_cmap._init()
    slopen = 200
    alphas_slope = np.abs(np.linspace(0, 1.0, slopen))
    alphas_stable = np.ones(3003-slopen)
    alphas = np.concatenate((alphas_slope, alphas_stable))
    my_cmap._lut[:,-1] = alphas
    my_cmap.set_under('gray', alpha=0)
    
    Lon_cor = Lon[50:632]-0.05
    Lat_cor = Lat[43:300]-0.05
    
    xpoints = Lon_cor
    ypoints = Lat_cor
    yp,xp = np.meshgrid(ypoints,xpoints)
    
    if np.shape(Activity_Map)[0] == len(year_range):
        if Plot_Frac ==1:
            zp = Activity_Map[iyear,43:300,50:632]/np.sum(Activity_Map[iyear,:,:])
        else:
            zp = Activity_Map[iyear,43:300,50:632]
    elif np.shape(Activity_Map)[2] == len(year_range):
        if Plot_Frac ==1:
            zp = Activity_Map[43:300,50:632,iyear]/np.sum(Activity_Map[:,:,iyear])
        else: 
            zp = Activity_Map[43:300,50:632,iyear]
    #zp = zp/float(10**6 * Avogadro) * (year_days * 24 * 60 * 60) * Molarch4 * float(1e10)
    
    fig, ax = plt.subplots(dpi=300)
    m = Basemap(llcrnrlon=xp.min(), llcrnrlat=yp.min(), urcrnrlon=xp.max(),
                urcrnrlat=yp.max(), projection='merc', resolution='h', area_thresh=5000)
    m.drawmapboundary(fill_color='Azure')
    m.fillcontinents(color='FloralWhite', lake_color='Azure',zorder=1)
    m.drawcoastlines(linewidth=0.5,zorder=3)
    m.drawstates(linewidth=0.25,zorder=3)
    m.drawcountries(linewidth=0.5,zorder=3)
        
        #if Plot_Frac == 1:
        #    scale_max 
    
    xpi,ypi = m(xp,yp)
    #plot = m.pcolor(xpi,ypi,zp.transpose(), cmap=my_cmap, vmin=10**-15, vmax=scale_max, snap=True,zorder=2)
    plot = m.scatter(xpi,ypi,s=20,c=zp.transpose(),cmap=my_cmap,zorder=2,vmin = 10**-15,snap = True,vmax = scale_max)
    cb = m.colorbar(plot, location = "bottom", pad = "1%")        
    tick_locator = ticker.MaxNLocator(nbins=5)
    cb.locator = tick_locator
    cb.update_ticks()
    
    cb.ax.set_xlabel(legend_str,fontsize=10)
    cb.ax.tick_params(labelsize=10)
    Titlestring = str(year_range[iyear])+' '+title_str2
    plt.title(Titlestring, fontsize=14);
    plt.show();

In [None]:
# Plot Heat Map

#Map_TransPipelines

# Activity_Map = 0.1x0.1 map of activity data (counts or absolute units)
# Plot_Frac    = 0 or 1 (0= plot activity data in absolute counts, 1= plot fractional activity data)
# Lat          = 0.1 degree Lat values (select range)
# Lon          = 0.1 degree Lon values (select range)
# year_range   = array of inventory years
# title_str    = title of map
# legend_str   = title of legend
# scale_max    = maximum of color scale

Activity_Map = Map_TransPipelines
Plot_Frac = 1
Lat = Lat_01
Lon = Lon_01
year_range = year_range
title_str2 = "Proxy - Transmission Pipeline Mileage"
legend_str = "Annual Fraction of National Transmission Pipeline Mileage"
scale_max = 0.005

for iyear in np.arange(0,len(year_range)): 
    my_cmap = copy(plt.cm.get_cmap('rainbow',lut=3000))
    my_cmap._init()
    slopen = 200
    alphas_slope = np.abs(np.linspace(0, 1.0, slopen))
    alphas_stable = np.ones(3003-slopen)
    alphas = np.concatenate((alphas_slope, alphas_stable))
    my_cmap._lut[:,-1] = alphas
    my_cmap.set_under('gray', alpha=0)
    
    Lon_cor = Lon[50:632]-0.05
    Lat_cor = Lat[43:300]-0.05
    
    xpoints = Lon_cor
    ypoints = Lat_cor
    yp,xp = np.meshgrid(ypoints,xpoints)
    
    if np.shape(Activity_Map)[0] == len(year_range):
        if Plot_Frac ==1:
            zp = Activity_Map[iyear,43:300,50:632]/np.sum(Activity_Map[iyear,:,:])
        else:
            zp = Activity_Map[iyear,43:300,50:632]
    elif np.shape(Activity_Map)[2] == len(year_range):
        if Plot_Frac ==1:
            zp = Activity_Map[43:300,50:632,iyear]/np.sum(Activity_Map[:,:,iyear])
        else: 
            zp = Activity_Map[43:300,50:632,iyear]
    #zp = zp/float(10**6 * Avogadro) * (year_days * 24 * 60 * 60) * Molarch4 * float(1e10)
    
    fig, ax = plt.subplots(dpi=300)
    m = Basemap(llcrnrlon=xp.min(), llcrnrlat=yp.min(), urcrnrlon=xp.max(),
                urcrnrlat=yp.max(), projection='merc', resolution='h', area_thresh=5000)
    m.drawmapboundary(fill_color='Azure')
    m.fillcontinents(color='FloralWhite', lake_color='Azure',zorder=1)
    m.drawcoastlines(linewidth=0.5,zorder=3)
    m.drawstates(linewidth=0.25,zorder=3)
    m.drawcountries(linewidth=0.5,zorder=3)
        
        #if Plot_Frac == 1:
        #    scale_max 
    
    xpi,ypi = m(xp,yp)
    plot = m.pcolor(xpi,ypi,zp.transpose(), cmap=my_cmap, vmin=10**-15, vmax=scale_max, snap=True,zorder=2)
    #plot = m.scatter(xpi,ypi,s=20,c=zp.transpose(),cmap=my_cmap,zorder=2,vmin = 10**-15,snap = True,vmax = scale_max)
    cb = m.colorbar(plot, location = "bottom", pad = "1%")        
    tick_locator = ticker.MaxNLocator(nbins=5)
    cb.locator = tick_locator
    cb.update_ticks()
    
    cb.ax.set_xlabel(legend_str,fontsize=10)
    cb.ax.tick_params(labelsize=10)
    Titlestring = str(year_range[iyear])+' '+title_str2
    plt.title(Titlestring, fontsize=14);
    plt.show();

In [None]:
ct = datetime.now() 
ft = ct.timestamp() 
time_elapsed = (ft-it)/(60*60)
print('Time to run: '+str(time_elapsed)+' hours')
print('** GEPA_1B2b_TransmissionStorage: COMPLETE **')