In [1]:
import xarray as xr
from datetime import datetime, timedelta, time, date
import glob
import pandas as pd

stationcode='CBW'
mp_start_date = "2021-01-01"
mp_end_date = "2022-01-01"
sim_len = 240
variablelist_vars = ['flux_ff_exchange_prior']
fluxtype='PARIS'
filepath='/projects/0/ctdas/PARIS/DATA/footprints/wur/PARIS_recompile/'

In [2]:
def filter_files_by_date(file_list, start_date, end_date):
    """ Function to filter a list of files by a certain date range. 
    """
    filtered_files = []

    date_format = "%Y-%m-%d"  # adjust the format based on your actual filenames

    start_datetime = datetime.strptime(start_date, date_format)
    end_datetime = datetime.strptime(end_date, date_format)

    for file_name in file_list:
        date_str = file_name.split("_")[3][:13]  # adjust the index based on your actual filenames
        file_datetime = datetime.strptime(date_str, "%Yx%mx%dx%H")

        if start_datetime <= file_datetime <= end_datetime:
            filtered_files.append(file_name)

    return filtered_files

def footprint_hours(fp_filelist, simulation_len):
    """ Function to extract the hours of the footprint files.
        The function accounts for extra possible term in filename, that describes the ensemble run number.
    """
    # Define time string
    if len(fp_filelist[0].split(sep='x')) == 8:
        timestr_start = fp_filelist[0][-42:-29]
        timestr_end = fp_filelist[-1][-42:-29]
    elif len(fp_filelist[0].split(sep='x')) == 9:
        timestr_start = fp_filelist[0][-45:-32]
        timestr_end = fp_filelist[-1][-45:-32]
    
    # Define time range
    fp_range_start = datetime.strptime(timestr_start, '%Yx%mx%dx%H') - timedelta(hours=simulation_len)
    fp_range_end = datetime.strptime(timestr_end, '%Yx%mx%dx%H')

    # Define list of times
    times = pd.date_range(start=fp_range_start, end=fp_range_end, freq='H')

    # Drop times that don't have the same Hour of Day (HOD) as the footprint files
    for time in times:
        if time.hour not in range(fp_range_start.hour, (fp_range_end + timedelta(hours=1)).hour):
            times = times.drop(time)

    return times


In [None]:
sparse_files = sorted(glob.glob(filepath + 'footprint_' + stationcode + '*.nc'))
sparse_files = filter_files_by_date(sparse_files, mp_start_date, mp_end_date)

In [None]:
# Create timerange of simulation
timerange_sim = pd.date_range(start=mp_start_date, end=mp_end_date, freq='H')
timerange_fp = footprint_hours(fp_filelist=sparse_files, simulation_len=sim_len)

In [None]:
#timerange_sim.strftime("%Y-%m-%d %H:%M:%S").to_list()
timerange_sim.to_pydatetime()
timerange_sim

In [None]:
timerange_fp

In [None]:
from tqdm import tqdm

for simtime in tqdm(timerange_sim):
    if simtime in timerange_fp.tolist():
        print(timerange_fp.tolist().index(simtime))

In [None]:
import os
outdir = '/projects/0/ctdas/PARIS/DATA/obspacks/BASE/'
basepath = os.path.abspath(os.path.join(outdir, os.pardir)) + '/BASE_SS/'
basepath

In [None]:
station='RGL'
glob.glob(basepath + '*' + station.lower() + '*.nc')

In [None]:
sumvar = 'flux_ff_exchange_prior'
variablelist_vars = ['flux_ff_exchange_prior', 'flux_ocean_exchange_prior', 'flux_fire_exchange_prior', 'flux_bio_exchange_prior']
variablelist_vars.remove(sumvar)
print(variablelist_vars)


In [None]:
import numpy as np
timedict = {np.datetime64('2021-01-22T11:00:00.000000000'):[1, 2, 3], np.datetime64('2020-12-22T12:00:00.000000000'):[1, 2, 3],
       np.datetime64('2020-12-22T13:00:00.000000000'):[1, 2, 3], np.datetime64('2021-05-22T14:00:00.000000000'):[1, 2, 3]}

In [None]:
timedict = dict(sorted(timedict.items()))
min(timedict)


In [3]:
import xarray as xr
sumvars = ['flux_ocean_exchange_prior', 'flux_fire_exchange_prior', 'flux_bio_exchange_prior']

nc_file = '/projects/0/ctdas/PARIS/DATA/obspacks/BASE_SS/pseudo_co2_rgl_tower-insitu_160_allvalid-90magl_2021-07-01-00:00-2021-08-01-00:00.nc'
ds = xr.open_dataset(nc_file)[sumvars]
ds

In [15]:

obspack_orig='/projects/0/ctdas/PARIS/DATA/obs/non_obspacksites/DECC-picarro_HFD_co2-100m-20231116.nc'
obspack_orig.split(sep = '-')[-2][0:3]

'100'