# Check & Format Final Gridded Emission Files
## for Maasakkers, J.D., McDuffie, E. E., et al., 2023
### last updated: September 10, 2023

Purpose: Compile all files into annual emission files including additional metadata

In [1]:
import time
from netCDF4 import Dataset
import numpy as np
import glob
import datetime

In [2]:
#Set years
years = np.arange(2012,2018+1)

#List of files:
filenames_monthly = glob.glob("GEPAv2_Zenodo/GEPAv2/*Monthly*")
filenames_monthly.sort()
filenames_monthly = np.array(filenames_monthly)
filenames_annual = glob.glob("GEPAv2_Zenodo/GEPAv2/*")
filenames_annual.sort()
filenames_annual = np.array(filenames_annual)

filter_monthly = np.zeros(len(filenames_annual))
for i in np.arange(len(filenames_annual)):
    if np.sum(filenames_monthly == filenames_annual[i]) > 0:
        filter_monthly[i] = 1 
filenames_annual = filenames_annual[filter_monthly==0]

#Read one file to get lat/lon arrays
sta_f = Dataset(filenames_annual[0])
latitudes_out = np.array(sta_f.variables['lat'])
longitudes_out = np.array(sta_f.variables['lon'])
sta_f.close()

In [3]:
#Prep area file
#Read area map
are_f = Dataset('GEPAv2_Zenodo/Data/Gridded_area_c.nc')
are_map = np.array(are_f.variables['cell_area'])
nc_lon = np.array(are_f.variables['lon'])
nc_lat = np.array(are_f.variables['lat'])
are_f.close()

#Select domain
Resolution_01 = 0.1  #deg
area_matrix_01 = np.zeros([len(latitudes_out),len(longitudes_out)])

#Get rid of missing values
are_map[are_map > 1.e+14] = 0

for i in np.arange(len(latitudes_out)):
    for j in np.arange(len(longitudes_out)):
        area_matrix_01[i,j] = np.sum(are_map[i*10:(i+1)*10,j*10:(j+1)*10])*10000 #to cm^2

In [4]:
for yeari in years:
    nc_out = Dataset('GEPAv2_Zenodo/Output/Gridded_GHGI_Methane_v2_'+str(yeari)+'.nc', 'w', format='NETCDF4')
    nc_out.title = 'Gridded U.S. Greenhouse Gas Inventory (Version 2): Annual methane emissions'
    nc_out.publication = 'A gridded inventory of annual 2012-2018 U.S. anthropogenic methane emissions'
    nc_out.authors = 'Joannes D. Maasakkers, Erin E. McDuffie, Melissa P. Sulprizio, Candice Chen, Maggie Schultz, Lily Brunelle, Ryan Thrush, John Steller, Christopher Sherry, Daniel J. Jacob, Seongeun Jeong, Bill Irving, and Melissa Weitz'
    nc_out.history = 'September 10, 2023'
    nc_out.conventions = "COARDS"
    nc_out.version = '1.0 - Publication version (Data equals the preprint version)'
    nc_out.contact = 'J.D.Maasakkers@sron.nl and McDuffie.Erin.E@epa.gov'
    nc_out.year = str(yeari)

    #Create dimensions
    nc_out.createDimension('lat', len(latitudes_out))
    nc_out.createDimension('lon', len(longitudes_out))
    nc_out.createDimension('time', 1)

    #Create variables
    latitudes = nc_out.createVariable('lat', 'f4', ('lat',))
    longitudes = nc_out.createVariable('lon', 'f4', ('lon',))
    time = nc_out.createVariable('time', 'f4', ('time'))
    
    #Properties
    longitudes.standard_name = "longitude" 
    longitudes.long_name = "Longitude" 
    longitudes.units = "degrees_east" 

    latitudes.standard_name = "latitude" 
    latitudes.long_name = "Latitude" 
    latitudes.units = "degrees_north" 

    time.long_name = "time" ;
    time.units = "hours since "+str(yeari)+"-01-01 00:00:00" ;
    time.calendar = "standard" ;
    time.axis = "T";
                    
    #Put data into the arrays
    latitudes[:]  = np.round(latitudes_out,2)
    longitudes[:] = np.round(longitudes_out,2)
    
    #Calculate time variable
    dt1 = datetime.datetime.strptime("1 1 " + str(yeari), "%d %m %Y")
    dt2 = datetime.datetime.strptime("1 1 " + str(yeari), "%d %m %Y")
    time[:] = (dt2 - dt1).days*24

    for fili in filenames_annual:
        data_out_array = np.zeros([1,len(latitudes_out),len(longitudes_out)])

        sta_f = Dataset(fili)
        data_out_array[0,:,:] = np.array(sta_f.variables['emi_ch4'])[:,:,yeari-2012]
        sta_f.close()

        outstring = fili
        outstring = outstring.replace('GEPAv2_Zenodo/GEPAv2/EPA_v2_','')
        outstring = outstring.replace('.nc','')
        outstring = "emi_ch4_" + outstring

        fili = nc_out.createVariable(outstring, 'f4', ('time','lat', 'lon'), zlib=True)
        fili.source_category = outstring.split('_')[2]
        fili.standard_name = "annual_emissions" 
        fili.long_name = str(yeari) + ' Methane emissions from IPCC source category ' + ' '.join(outstring.split('_')[2:])
        fili.units = "moleccm-2s-1" 
        fili[:,:,:] = data_out_array[:,:,:]
        
    grid_cell_area = nc_out.createVariable('grid_cell_area', 'f4', ('time','lat', 'lon'), zlib=True)
    grid_cell_area.standard_name = "grid_cell_area" 
    grid_cell_area.long_name = "Grid cell areas to convert to absolute emissions"
    grid_cell_area.units = "cm^2" 
    grid_cell_area[0,:,:] = area_matrix_01[:,:] 

    #Close the file
    nc_out.close()

## Monthly scaling factors

In [5]:
for yeari in years:
    nc_out = Dataset('GEPAv2_Zenodo/Output/Gridded_GHGI_Methane_v2_Monthly_Scale_Factors_'+str(yeari)+'.nc', 'w', format='NETCDF4')
    nc_out.title = 'Gridded U.S. Greenhouse Gas Inventory (Version 2): Monthly methane sector scaling factors'
    nc_out.how_to_use = 'Sector-specific factors in this file can be multiplied by the annual methane emission flux data to estimate monthly emission fluxes for source sectors with strong monthly variability'
    nc_out.publication = 'A gridded inventory of annual 2012-2018 U.S. anthropogenic methane emissions'
    nc_out.authors = 'Joannes D. Maasakkers, Erin E. McDuffie, Melissa P. Sulprizio, Candice Chen, Maggie Schultz, Lily Brunelle, Ryan Thrush, John Steller, Christopher Sherry, Daniel J. Jacob, Seongeun Jeong, Bill Irving, and Melissa Weitz'
    nc_out.history = 'September 10, 2023'
    nc_out.conventions = "COARDS"
    nc_out.version = '1.0 - Publication version (Data equals the preprint version)'
    nc_out.contact = 'J.D.Maasakkers@sron.nl and McDuffie.Erin.E@epa.gov'
    nc_out.year = str(yeari)

    #Create dimensions
    nc_out.createDimension('lat', len(latitudes_out))
    nc_out.createDimension('lon', len(longitudes_out))
    nc_out.createDimension('time', 12)

    #Create variables
    latitudes = nc_out.createVariable('lat', 'f4', ('lat',))
    longitudes = nc_out.createVariable('lon', 'f4', ('lon',))
    time = nc_out.createVariable('time', 'f4', ('time'))
    
    #Properties
    longitudes.standard_name = "longitude" 
    longitudes.long_name = "Longitude" 
    longitudes.units = "degrees_east" 

    latitudes.standard_name = "latitude" 
    latitudes.long_name = "Latitude" 
    latitudes.units = "degrees_north" 

    time.long_name = "time" ;
    time.units = "hours since "+str(yeari)+"-01-01 00:00:00" ;
    time.calendar = "standard" ;
    time.axis = "T";
                    
    #Put data into the arrays
    latitudes[:]  = np.round(latitudes_out,2)
    longitudes[:] = np.round(longitudes_out,2)
    
    #Calculate time variable
    time_array = np.zeros(12)
    dt1 = datetime.datetime.strptime("1 1 " + str(yeari), "%d %m %Y")
    for ti in np.arange(len(time_array)):
        dt2 = datetime.datetime.strptime("1 " + str(ti+1) + " " + str(yeari), "%d %m %Y")
        time_array[ti] = (dt2 - dt1).days*24
    time[:] = time_array[:]
    
    for fili in filenames_monthly:
        data_out_array = np.zeros([12,len(latitudes_out),len(longitudes_out)])

        sta_f = Dataset(fili)
        for ti in np.arange(12):
            data_out_array[ti,:,:] = np.array(sta_f.variables['emi_ch4'])[:,:,yeari-2012,ti]
        sta_f.close()
        
        #Get annual file
        fili_annual = fili.replace("_Monthly.nc",".nc")
        sta_f = Dataset(fili_annual)
        data_out_div = np.array(sta_f.variables['emi_ch4'])[:,:,yeari-2012]
        sta_f.close()
        
        data_out_array[:,data_out_div>0] = data_out_array[:,data_out_div>0]/data_out_div[data_out_div>0]
        
        outstring = fili
        outstring = outstring.replace('GEPAv2_Zenodo/GEPAv2/EPA_v2_','')
        outstring = outstring.replace('_Monthly.nc','')
        outstring = "monthly_scale_factor_" + outstring

        fili = nc_out.createVariable(outstring, 'f4', ('time','lat', 'lon'), zlib=True)
        fili.source_category = outstring.split('_')[3]
        fili.standard_name = "monthly_scaling" 
        fili.long_name = str(yeari) + ' Monthly scale factors for IPCC source category ' + ' '.join(outstring.split('_')[3:])
        fili.units = "none" 
        fili[:,:,:] = data_out_array[:,:,:]

    #Close the file
    nc_out.close()

## Extension

In [6]:
#Set years
years_extension = np.arange(2012,2020+1)

#List of files:
filenames_extension = glob.glob("GEPAv2_Zenodo/GEPAv2_Extension/*")
filenames_extension.sort()
filenames_extension = np.array(filenames_extension)

#Read one file to get lat/lon arrays
sta_f = Dataset(filenames_extension[0])
latitudes_out = np.array(sta_f.variables['lat'])
longitudes_out = np.array(sta_f.variables['lon'])
sta_f.close()

In [7]:
for yeari in years_extension:
    nc_out = Dataset('GEPAv2_Zenodo/Output/Express_Extension_Gridded_GHGI_Methane_v2_'+str(yeari)+'.nc', 'w', format='NETCDF4')
    nc_out.title = 'Express Extension to the Gridded U.S. Greenhouse Gas Inventory (Version 2): Annual methane emissions'
    nc_out.publication = 'A gridded inventory of annual 2012-2018 U.S. anthropogenic methane emissions'
    nc_out.authors = 'Joannes D. Maasakkers, Erin E. McDuffie, Melissa P. Sulprizio, Candice Chen, Maggie Schultz, Lily Brunelle, Ryan Thrush, John Steller, Christopher Sherry, Daniel J. Jacob, Seongeun Jeong, Bill Irving, and Melissa Weitz'
    nc_out.history = 'September 10, 2023'
    nc_out.conventions = "COARDS"
    nc_out.version = '1.0 - Publication version (Data equals the preprint version)'
    nc_out.contact = 'J.D.Maasakkers@sron.nl and McDuffie.Erin.E@epa.gov'
    nc_out.year = str(yeari)

    #Create dimensions
    nc_out.createDimension('lat', len(latitudes_out))
    nc_out.createDimension('lon', len(longitudes_out))
    nc_out.createDimension('time', 1)

    #Create variables
    latitudes = nc_out.createVariable('lat', 'f4', ('lat',))
    longitudes = nc_out.createVariable('lon', 'f4', ('lon',))
    time = nc_out.createVariable('time', 'f4', ('time'))
    
    #Properties
    longitudes.standard_name = "longitude" 
    longitudes.long_name = "Longitude" 
    longitudes.units = "degrees_east" 

    latitudes.standard_name = "latitude" 
    latitudes.long_name = "Latitude" 
    latitudes.units = "degrees_north" 

    time.long_name = "time" ;
    time.units = "hours since "+str(yeari)+"-01-01 00:00:00" ;
    time.calendar = "standard" ;
    time.axis = "T";
                    
    #Put data into the arrays
    latitudes[:]  = np.round(latitudes_out,2)
    longitudes[:] = np.round(longitudes_out,2)
    
    #Calculate time variable
    dt1 = datetime.datetime.strptime("1 1 " + str(yeari), "%d %m %Y")
    dt2 = datetime.datetime.strptime("1 1 " + str(yeari), "%d %m %Y")
    time[:] = (dt2 - dt1).days*24

    for fili in filenames_extension:
        data_out_array = np.zeros([1,len(latitudes_out),len(longitudes_out)])

        sta_f = Dataset(fili)
        data_out_array[0,:,:] = np.array(sta_f.variables['emi_ch4'])[:,:,yeari-2012]
        sta_f.close()

        outstring = fili
        outstring = outstring.replace('GEPAv2_Zenodo/GEPAv2_Extension/EXT_EPA_v2_','')
        outstring = outstring.replace('.nc','')
        outstring = "emi_ch4_" + outstring

        fili = nc_out.createVariable(outstring, 'f4', ('time','lat', 'lon'), zlib=True)
        fili.source_category = outstring.split('_')[2]
        if outstring.split('_')[2] == "Supp":
            #Add correct source type for the Supplemental emissions
            fili.source_category = outstring.split('_')[3]
        fili.standard_name = "express_emissions" 
        fili.long_name = str(yeari) + ' Express Extension Methane emissions from IPCC source category ' + ' '.join(outstring.split('_')[2:])
        fili.units = "moleccm-2s-1" 
        fili[:,:,:] = data_out_array[:,:,:]
        
    grid_cell_area = nc_out.createVariable('grid_cell_area', 'f4', ('time','lat', 'lon'), zlib=True)
    grid_cell_area.standard_name = "grid_cell_area" 
    grid_cell_area.long_name = "Grid cell areas to convert to absolute emissions"
    grid_cell_area.units = "cm^2" 
    grid_cell_area[0,:,:] = area_matrix_01[:,:] 

    #Close the file
    nc_out.close()