In [None]:
# --- Need to run this on CCR to get data ---

In [None]:
from pathlib import Path
import shutil # For potential file copying/moving

# --- Configuration ---
# IMPORTANT: Replace this with the actual path to your 'projects' directory on the CCR system.
# This assumes the CCR directory is mounted locally or you are running this script *on* the CCR system.
CCR_ROOT_DIR = Path("/projects/grid/ghub/ISMIP6/Projections/Full_Cleaned_Projection_Data/GrIS")

# The specific data variable you're looking for
VARIABLE_NAME = "lithk"

# The specific experiment folder you're targeting
EXPERIMENT_NAME = "exp06"

# Directory where you want to save the downloaded files on your local machine
DOWNLOAD_DIR = Path(f"{VARIABLE_NAME}_{EXPERIMENT_NAME}") # Creates a new folder for this specific download

# Create the download directory if it doesn't exist
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)

print(f"Searching for '{VARIABLE_NAME}' NetCDF files for experiment '{EXPERIMENT_NAME}' under: {CCR_ROOT_DIR}")
print(f"Files will be downloaded to: {DOWNLOAD_DIR}\n")

# --- Search and Download ---
found_files_count = 0

# The rglob pattern will look for any .nc file that contains 'lithk' in its name
# AND is located within a directory named 'exp06'.
# We are still relying on the path containing 'GrIS' to ensure we're in the right top-level group.
# The double asterisk (**) in the pattern indicates arbitrary directories.
# This pattern tries to match: .../exp06/lithk*.nc
search_pattern = f"**/{EXPERIMENT_NAME}/{VARIABLE_NAME}*.nc"

for netcdf_file in CCR_ROOT_DIR.rglob(search_pattern):
    # Path example: projects/.../GrIS/<ModelGroup>/<ModelName>/exp06/lithk_data.nc

    # Further verification to ensure it's specifically within the 'GrIS' path
    # and has the expected segment order.
    # We can convert the Path object to its string representation to check segments.
    # Or, even better, check 'parts' of the Path object.
    path_parts = netcdf_file.parts

    # Let's verify that 'GrIS' appears *before* 'exp06' and 'lithk'
    # and that 'exp06' and 'lithk' are indeed in the path.
    try:
        gris_idx = path_parts.index('GrIS')
        exp_idx = path_parts.index(EXPERIMENT_NAME)
        # Check if GrIS appears before the experiment folder
        if gris_idx < exp_idx:
            # If you want to be super strict, you could check for ModelGroup and ModelName
            # at path_parts[gris_idx + 1] and path_parts[gris_idx + 2] respectively.
            # For now, let's assume if it matches the general pattern and GrIS/exp06 order, it's correct.

            print(f"Found: {netcdf_file}")
            destination_path = DOWNLOAD_DIR / netcdf_file.name
            print(f"Copying to: {destination_path}")
            try:
                shutil.copy(netcdf_file, destination_path) # Copies the file
                found_files_count += 1
                print("-" * 30)
            except Exception as e:
                print(f"Error copying {netcdf_file}: {e}")
                print("-" * 30)
        else:
            # print(f"Skipping {netcdf_file}: 'GrIS' found after '{EXPERIMENT_NAME}'.")
            pass
    except ValueError:
        # 'GrIS' or 'exp06' (or both) not found in the path parts, so skip
        # print(f"Skipping {netcdf_file}: Path components not as expected.")
        pass

if found_files_count == 0:
    print(f"No '{VARIABLE_NAME}' NetCDF files for experiment '{EXPERIMENT_NAME}' found in '{CCR_ROOT_DIR}' matching the criteria.")
else:
    print(f"\nSuccessfully downloaded {found_files_count} '{VARIABLE_NAME}' NetCDF files for '{EXPERIMENT_NAME}' to {DOWNLOAD_DIR}")