In [19]:
# Import required libraries
import os
import json
import pandas as pd
import numpy as np
from scipy.interpolate import griddata
from tqdm import tqdm

In [20]:
# Define data directories
# base_data_dir = '/srv/scratch/z5370003/projects/data/groundwater/FEFLOW/coastal/variable_density/'
base_data_dir = '/Users/arpitkapoor/Library/CloudStorage/OneDrive-UNSW/Shared/Projects/01_PhD/05_groundwater/data/FEFLOW/variable_density'  # Uncomment for local testing
raw_data_dir = os.path.join(base_data_dir, 'all')
filtered_all_ts_data_dir = os.path.join(base_data_dir, 'filter_all_ts')

print(f"Base data directory: {base_data_dir}")
print(f"Raw data directory: {raw_data_dir}")
print(f"Filtered all ts data directory: {filtered_all_ts_data_dir}")

Base data directory: /Users/arpitkapoor/Library/CloudStorage/OneDrive-UNSW/Shared/Projects/01_PhD/05_groundwater/data/FEFLOW/variable_density
Raw data directory: /Users/arpitkapoor/Library/CloudStorage/OneDrive-UNSW/Shared/Projects/01_PhD/05_groundwater/data/FEFLOW/variable_density/all
Filtered all ts data directory: /Users/arpitkapoor/Library/CloudStorage/OneDrive-UNSW/Shared/Projects/01_PhD/05_groundwater/data/FEFLOW/variable_density/filter_all_ts


In [21]:
# Get and sort time series files
ts_files = sorted(os.listdir(filtered_all_ts_data_dir))
print(f"Total number of files: {len(ts_files)}")
print(f"First 3 files: {ts_files[:3]}")
print(f"Last 3 files: {ts_files[-3:]}")

Total number of files: 1909
First 3 files: ['0000.csv', '0001.csv', '0002.csv']
Last 3 files: ['1906.csv', '1907.csv', '1908.csv']


In [22]:
# Define json file path
patch_config_json = os.path.join(base_data_dir, 'patches.json')

with open(patch_config_json, 'r') as f:
    patch_config = json.load(f)

In [23]:
filter_patch_data_dir = os.path.join(base_data_dir, 'filter_all_ts_patch')

In [24]:
target_cols = ['mass_concentration', 'head', 'pressure']
coords_cols = ['X', 'Y', 'Z']

patch_data = {}

for k, v in patch_config.items():
    
    # Get the patch configuration
    config = patch_config[k]

    # Print patch information
    print(f"\nProcessing patch {k}")
    print(f"Patch {k} has {len(config['core_nodes'])} core nodes and {len(config['ghost_nodes'])} ghost nodes")
    print(f"Patch {k} has {len(config['neighbour_patches'])} neighbour patches")
    print(f"Patch {k} has {config['slice_group']} slice group")

    # Initialize lists to store data
    core_patch_data = []
    ghost_patch_data = []

    # Load the data#
    for ts_file in ts_files:
        ts_df = pd.read_csv(os.path.join(filtered_all_ts_data_dir, ts_file))
        core_patch_data.append(ts_df.loc[ts_df['node'].isin(config['core_nodes']), target_cols].values)
        ghost_patch_data.append(ts_df.loc[ts_df['node'].isin(config['ghost_nodes']), target_cols].values)

    # Convert to numpy arrays
    core_patch_data = np.array(core_patch_data)
    ghost_patch_data = np.array(ghost_patch_data)
    core_coords = ts_df.loc[ts_df['node'].isin(config['core_nodes']), coords_cols].values
    ghost_coords = ts_df.loc[ts_df['node'].isin(config['ghost_nodes']), coords_cols].values

    # Create directory for patch data
    patch_dir_path = os.path.join(filter_patch_data_dir, f'patch_{int(k):03d}')
    os.makedirs(patch_dir_path, exist_ok=True)

    # Save the data
    np.save(os.path.join(patch_dir_path, 'core_obs.npy'), core_patch_data)
    np.save(os.path.join(patch_dir_path, 'ghost_obs.npy'), ghost_patch_data)
    np.save(os.path.join(patch_dir_path, 'core_coords.npy'), core_coords)
    np.save(os.path.join(patch_dir_path, 'ghost_coords.npy'), ghost_coords)



Processing patch 1
Patch 1 has 2117 core nodes and 630 ghost nodes
Patch 1 has 6 neighbour patches
Patch 1 has 1 slice group

Processing patch 2
Patch 2 has 3182 core nodes and 954 ghost nodes
Patch 2 has 6 neighbour patches
Patch 2 has 1 slice group

Processing patch 3
Patch 3 has 3202 core nodes and 640 ghost nodes
Patch 3 has 4 neighbour patches
Patch 3 has 1 slice group

Processing patch 4
Patch 4 has 2510 core nodes and 750 ghost nodes
Patch 4 has 6 neighbour patches
Patch 4 has 1 slice group

Processing patch 5
Patch 5 has 5509 core nodes and 825 ghost nodes
Patch 5 has 3 neighbour patches
Patch 5 has 1 slice group

Processing patch 6
Patch 6 has 3263 core nodes and 1793 ghost nodes
Patch 6 has 11 neighbour patches
Patch 6 has 1 slice group

Processing patch 7
Patch 7 has 4516 core nodes and 1125 ghost nodes
Patch 7 has 5 neighbour patches
Patch 7 has 1 slice group

Processing patch 8
Patch 8 has 3594 core nodes and 1074 ghost nodes
Patch 8 has 6 neighbour patches
Patch 8 has 1 