# Sanity Check: Yearly SLC totals against other inventories

In [33]:
import xarray as xr
import pandas as pd
import os
import pyproj
import numpy as np
import xesmf as xe
import calendar
import datetime
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import sys
sys.path.append('..')

import noaa_csl_funcs as ncf

In [34]:
#Define classes
class Regridded_CSL_Handler:
    '''A class to handle NOAA CSL inventory data that has been regridded and organized by regrid_data.py'''

    def __init__(self,regridded_path,bau_or_covid='COVID'):
        '''Everything revolves around the "regridded_path", which determines sectors via the filenames contained within'''

        self.regridded_path = regridded_path
        self.sectors = self.get_sectors()
        self.bau_or_covid = bau_or_covid

    def get_sectors(self):
        '''Lists the sectors in the regridded data storage path'''

        sector_list = ncf.listdir_visible(self.regridded_path)
        sectors = {'area':[],'point':[]}
        for sector in sector_list:
            if 'area' in sector:
                sectors['area'].append(sector)
            elif 'point' in sector:
                sectors['point'].append(sector)
            else:
                raise ValueError(f"Unexpected sector type {sector}, not point or area.")
        return sectors

    def get_sector_subset_list(self,sector_subset):
        '''Gets a subset of the sectors which could be one, all, or some
        
        Args:
        sector_subset (str,list): "all" will return all sectors,  'point' will return point sectors, 'area' will return area sectors, a list will just return that list
        
        Returns:
        sector_subset_list (list) : list of sectors in the subset. 
        '''

        if sector_subset == 'all':
            sector_subset_list = []
            for k,v in self.sectors.items():
                sector_subset_list.extend(v)
            return sector_subset_list
        elif type(sector_subset)==str:
            return self.sectors[sector_subset]
        else:
            return sector_subset

    def get_days_in_range(self,dt1,dt2,day_types,sector_subset = 'all',add_path=True):
        '''Gets all filepaths to the day_type level that are within a datetime range
        
        Args:
        dt1 (datetime.date) : a date, datetime, etc to start the range (will only use year and month)
        dt2 (datetime.date) : a date, datetime, etc to end the range (will only use year and month)
        sectors (list) : list of sectors to include in the list
        day_types (list) : list of day types to include in the list
        add_path (bool, optional) : if true (default) it will add the regridded path to each element

        Returns:
        days_in_range (list) : list of paths to files that are within the date range and sector, day_types, etc. 
        '''

        dates_list = pd.date_range(dt1,dt2,freq = 'MS') #get a list of all the months between the dts
        sector_subset_list = self.get_sector_subset_list(sector_subset)
        days_in_range = []
        for date in dates_list:
            for sector in sector_subset_list:
                for day_type in day_types:
                    day_path = f'{sector}/{ncf.yr_to_yrstr(sector,date.year,self.bau_or_covid)}/{ncf.month_int_to_str(date.month)}/{day_type}'
                    if add_path:
                        days_in_range.append(os.path.join(self.regridded_path,day_path))
                    else:
                        days_in_range.append(day_path)
        return days_in_range
    
    def get_files_in_days(self,days_paths):
        '''Gets the files that exist in the paths
        
        Args:
        days_path (list) : list of paths to the days folders
        
        Returns:
        files (list) : list of files in those days' paths'''

        files = []
        for day_path in days_paths:
            files.extend(ncf.listdir_visible(day_path,add_path=True))
        return files


#Defime Functions
def preprocess_regridded(ds,extent,area_point):
    '''Preprocesses the regridded dataset when loaded to add attributes needed for concatenation
    
    Args:
    ds (xr.DataSet) : the dataset to process
    
    Returns 
    ds (xr.DataSet) : the dataset, with added coordinates taken from the attributes
    '''
    if area_point == 'area':
        ds = ds.assign_coords(sector = 'area_'+ ds.attrs['sector_id']) #add back the area, was cut off in attributes for some reason
    else:
        ds = ds.assign_coords(sector = 'point_'+ ds.attrs['sector_id']) #add back the area, was cut off in attributes for some reason
    ds = ds.assign_coords(day_type = ds.attrs['day_type']) 
    ds = ds.assign_coords(yr_mo=f'{ds.attrs['year']}-{ds.attrs['month']}')
    ds = ds.expand_dims(dim=['sector','day_type','yr_mo'])
    if area_point == 'area':
        ds = slice_extent(ds,extent)
    else:
        ds = ds.where(((ds.lat>=extent['lat_low'])&
                        (ds.lat<=extent['lat_high'])&
                        (ds.lon>=extent['lon_low'])&
                        (ds.lon<=extent['lon_high'])).compute(),drop=True)
    try:
        del ds.attrs['nc_fpath']
    except:
        pass
    return ds

def slice_extent(ds,extent):
    ds = ds.sel(lat=slice(extent['lat_low'],extent['lat_high']),lon=slice(extent['lon_low'],extent['lon_high']))
    return ds 

def get_satsunwkd(year,month):
    '''Gets the number of saturdays, sundays and weekdays in a given month+year
    
    Args:
    year (int) : the year
    month (int) : the month, as an integer
    
    Returns:
    sat_count (int) : number of saturdays
    sun_count (int) : number of sundays
    weekd_count (int) : number of weekdays
    '''

    num_days_in_month = calendar.monthrange(year,month)[1]
    month_str = f'{month:02d}'

    dow_ints = list(pd.date_range(start=f'{year}-{month_str}-01',end=f'{year}-{month_str}-{num_days_in_month}').weekday)
    sat_count = len([ dow for dow in dow_ints if dow == 5 ])
    sun_count = len([ dow for dow in dow_ints if dow == 6 ])
    weekd_count = len([ dow for dow in dow_ints if dow < 5 ])
    return sat_count,sun_count,weekd_count


## Define Parameters

In [35]:
map_extent={'lon_low':-112.25,
            'lon_high':-111.55,
            'lat_low':40.3,
            'lat_high':41.1} 
dataset_extent = {'lon_low':-112.1,
                  'lon_high':-111.7,
                  'lat_low':40.4,
                  'lat_high':41.0} 


In [36]:
regridded_path = '/uufs/chpc.utah.edu/common/home/lin-group9/agm/NOAA_CSL_Data/regridded'
RCH = Regridded_CSL_Handler(regridded_path)
dt1  = '2019-01' #start year and month
dt2 = '2019-12' #end year and month
day_types = ['weekdy','satdy','sundy'] #a list with any or all of 'weekdy','satdy','sundy'
species = 'CO'

### Get area sources

In [37]:
sectors = 'area'

#Get the paths to the files that match the criteria
days_paths = RCH.get_days_in_range(dt1,dt2,day_types,sectors) 
files = RCH.get_files_in_days(days_paths)

#Load the files with xarray, preprocessing them so they can be combined by coordinates
ds_list = [] #initialize the list of datasets
for file in files:
    ds = preprocess_regridded(xr.open_dataset(file,chunks = {'utc_hour':1}),dataset_extent,sectors)[species] #prepreprocess the file, open with dask chunking, and only keep the species of interest
    ds_list.append(ds)  
ds_combined = xr.combine_by_coords(ds_list,combine_attrs='drop_conflicts') #this is the combined dataset!
mass_unit = ds_combined[species].attrs['units'].split()[0] #this will either be metric_Ton or moles depending on the species chosen
ds = ds_combined.sum(dim=['utc_hour','sector'])[species].assign_attrs({'units':f'{mass_unit} day^-1 meters^-2'})
month_sums = [] 
for yr_mo in ds.yr_mo.values:
    yr = int(yr_mo.split('-')[0])
    mo = int(yr_mo.split('-')[1])
    sat,sun,wkdy = get_satsunwkd(yr,mo)
    sat_sum = (ds.sel(yr_mo=yr_mo,day_type='satdy')*sat).assign_attrs({'units':f'{mass_unit} month^-1 meters^-2'})
    sun_sum = (ds.sel(yr_mo=yr_mo,day_type='sundy')*sun).assign_attrs({'units':f'{mass_unit} month^-1 meters^-2'})
    wkdy_sum = (ds.sel(yr_mo=yr_mo,day_type='weekdy')*wkdy).assign_attrs({'units':f'{mass_unit} month^-1 meters^-2'})
    month_sum = xr.combine_by_coords([sat_sum.to_dataset(name='sat'),
                                    sun_sum.to_dataset(name='sun'),
                                    wkdy_sum.to_dataset(name='wkdy')
                                    ],compat='override').to_array().sum("variable").drop_vars('day_type').assign_attrs({'units':f'{mass_unit} month^-1 meters^-2'})
    month_sums.append(month_sum.to_dataset(name=yr_mo))
yr_sum = xr.combine_by_coords(month_sums,compat='override').to_array().sum("variable").drop_vars('yr_mo').assign_attrs({'units':f'{mass_unit} meters^-2'})
grid_area = xr.open_dataset('../regridding/grid_area/grid_out_area.nc')
grid_area = slice_extent(grid_area,dataset_extent)
absolute_emissions = (yr_sum * grid_area['cell_area']).assign_attrs({'units':mass_unit})
area_sum = absolute_emissions.sum().values


### Get point sources

In [26]:
sectors = 'point'

#Get the paths to the files that match the criteria
days_paths = RCH.get_days_in_range(dt1,dt2,day_types,sectors) 
files = RCH.get_files_in_days(days_paths)

#Load the files with xarray, preprocessing them so they can be combined by coordinates
ds_list = [] #initialize the list of datasets
for file in files:
    ds = preprocess_regridded(xr.open_dataset(file,chunks = {'utc_hour':1}),dataset_extent,sectors)[species] #prepreprocess the file, open with dask chunking, and only keep the species of interest
    ds_list.append(ds)  
ds_combined = xr.combine_by_coords(ds_list,combine_attrs='drop_conflicts') #this is the combined dataset!
mass_unit = ds_combined[species].attrs['units'].split()[0] #this will either be metric_Ton or moles depending on the species chosen
ds = ds_combined.sum(dim=['utc_hour','sector'])[species].assign_attrs({'units':f'{mass_unit} day^-1 meters^-2'})
month_sums = [] 
for yr_mo in ds.yr_mo.values:
    yr = int(yr_mo.split('-')[0])
    mo = int(yr_mo.split('-')[1])
    sat,sun,wkdy = get_satsunwkd(yr,mo)
    sat_sum = (ds.sel(yr_mo=yr_mo,day_type='satdy')*sat).assign_attrs({'units':f'{mass_unit} month^-1 meters^-2'})
    sun_sum = (ds.sel(yr_mo=yr_mo,day_type='sundy')*sun).assign_attrs({'units':f'{mass_unit} month^-1 meters^-2'})
    wkdy_sum = (ds.sel(yr_mo=yr_mo,day_type='weekdy')*wkdy).assign_attrs({'units':f'{mass_unit} month^-1 meters^-2'})
    month_sum = xr.combine_by_coords([sat_sum.to_dataset(name='sat'),
                                    sun_sum.to_dataset(name='sun'),
                                    wkdy_sum.to_dataset(name='wkdy')
                                    ],compat='override').to_array().sum("variable").drop_vars('day_type').assign_attrs({'units':f'{mass_unit} month^-1 meters^-2'})
    month_sums.append(month_sum.to_dataset(name=yr_mo))
yr_sum = xr.combine_by_coords(month_sums,compat='override').to_array().sum("variable").drop_vars('yr_mo').assign_attrs({'units':f'{mass_unit} meters^-2'})
point_sum = yr_sum.sum().values


### Get total

In [50]:
total_sum = area_sum + point_sum
print(total_sum,mass_unit,species)

48787.23965782996 metric_Ton CO
