In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
forcing_dir = 'path/to/forcing/inputs/for/run'
var_list = ['APCP', 'DLWR', 'DSWR', 'Temp', 'SPFH', 'UGRD', 'VGRD', 'Press']
dt = 24

In [None]:
# scan and check ParFlow-CLM forcing files
# make a list of the variables
# check the time range which is in 24 hour increments

#step through the files and check the time range over a year and for each variable in the list above
#put the existence / non existence of the file in a pandas dataframe
#put the time range in a pandas dataframe
# Create a date range for each day of the year
date_range = pd.date_range(start='2002-10-01', end='2003-9-30', freq='D')

# Initialize an empty dataframe with the date range as the index
df_forcing = pd.DataFrame(index=date_range)

# Add columns for each variable with NaN values
for var in var_list:
    df_forcing[var] = np.nan

for var in var_list:
    for i in range(1, 8760, dt):
        file_name = 'CW3E.'+var+"."+f"{i:06d}"+'_to_'+f"{(i+dt-1):06d}"+'.pfb'
        file_path = f'{forcing_dir}/{file_name}'

        #convert the hour range to a pandas datetime object
        start = pd.to_datetime('2002-9-30') + pd.DateOffset(hours=i)
        end = pd.to_datetime('2002-9-30') + pd.DateOffset(hours=i+dt-1)

        # if the file exists and has non-zero size, set the flag to 1 for that time range
        if (os.path.exists(file_path)) and (os.path.getsize(file_path) > 0):
            df_forcing.loc[start:end, var] = 1
        else:
            df_forcing.loc[start:end, var] = 0

df_forcing.index = df_forcing.index.strftime('%Y-%m-%d')

In [None]:
# make a seaborn plot of the time range for each variable
# by days of the year
sns.set(style="whitegrid")
plt.figure(figsize=(10,6))
sns.heatmap(df_forcing.T, cmap='coolwarm', cbar=False)

plt.title('ParFlow-CLM Forcing Files')
plt.show()

In [None]:
# scan and check ParFlow-CLM forcing files to see if they are distributed 
# make a list of the variables
# check the time range which is in 24 hour increments

#step through the files and check the time range over a year and for each variable in the list above
#put the existence / non existence of the file in a pandas dataframe
#put the time range in a pandas dataframe
# Create a date range for each day of the year
date_range = pd.date_range(start='2002-10-01', end='2003-9-30', freq='D')

# Initialize an empty dataframe with the date range as the index
df_forcing_dist = pd.DataFrame(index=date_range)

# Add columns for each variable with NaN values
for var in var_list:
    df_forcing_dist[var] = np.nan

for var in var_list:
    for i in range(1, 8760, dt):
        file_name = 'CW3E.'+var+"."+f"{i:06d}"+'_to_'+f"{(i+dt-1):06d}"+'.pfb.dist'
        file_path = f'{forcing_dir}/{file_name}'

        #convert the hour range to a pandas datetime object
        start = pd.to_datetime('2002-9-30') + pd.DateOffset(hours=i)
        end = pd.to_datetime('2002-9-30') + pd.DateOffset(hours=i+dt-1)
        if os.path.exists(file_path):
            
            # if the file exists, set the time range to 1
            df_forcing_dist.loc[start:end, var] = 1
        else:
            df_forcing_dist.loc[start:end, var] = 0

df_forcing_dist.index = df_forcing_dist.index.strftime('%Y-%m-%d')

In [None]:
# make a seaborn plot of the time range for each variable
# by days of the year
sns.set(style="whitegrid")
plt.figure(figsize=(10,6))
sns.heatmap(df_forcing_dist.T, cmap='coolwarm', cbar=False)

plt.title('ParFlow-CLM Forcing Files distributed')
plt.show()