In [None]:
# This file was used to concat all of the downloaded ERA5-Land data. As the data was downloaded
# in smaller sections due to computational restraints.
# the function combines all of the .nc files in the given directory (E.g. AR-Vir) and converts them all to dfs


In [None]:
import xarray as xr 
import pandas as pd
import netCDF4 as nc 
import glob
import os

In [None]:
def process_nc_file(file_path, filename):
    try:
        # extract the site_id from the filename 
        site_id = filename.split('_')[1]  

        ds = xr.open_dataset(file_path)
        df = ds.to_dataframe().reset_index()
        df['valid_time'] = pd.to_datetime(df['valid_time']).dt.strftime('%Y-%m-%d')
        
        # add  site_id column
        df['site_id'] = site_id

        return df
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

directory_path = '/Users/abigailbase/PROJECT FILES/SSRD downloads'

if not os.path.isdir(directory_path):
    print(f"Directory does not exist: {directory_path}")
else:
    # empty list to hold dfs
    df_list = []

    # iterate over files in the directory
    for filename in os.listdir(directory_path):
        if filename.endswith(".nc"):
            file_path = os.path.join(directory_path, filename)
            print(f"Processing file: {file_path}")
            df = process_nc_file(file_path, filename)
            if df is not None:
                df_list.append(df)
            else:
                print(f"Skipped {file_path} due to processing error.")
    
    if df_list:
        # Concatenate all DataFrames in the list into a single DataFrame
        combined_df = pd.concat(df_list, ignore_index=True)
        
        # save the df to  CSV 
        combined_df.to_csv('SSRD.csv', index=False)
        print("Combined data saved to 'SSRD.csv'.")
    else:
        print("No DataFrames to concatenate.")