In [None]:
# Import necessary libraries
import os
import sys
import h5py
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Add parent directories to path
sys.path.append(os.path.abspath('../..'))
sys.path.append(os.path.abspath('../../lagrangebench'))

print(f"Current working directory: {os.getcwd()}")
print(f"Python path: {sys.path[:3]}...")  

Current working directory: /home/yusuf/Physics_Emulators_using_Continuous_Convolutions/SFBC/notebooks
Python path: ['/home/yusuf/miniconda3/envs/torch_sfbc/lib/python311.zip', '/home/yusuf/miniconda3/envs/torch_sfbc/lib/python3.11', '/home/yusuf/miniconda3/envs/torch_sfbc/lib/python3.11/lib-dynload']...


In [None]:
base_path = Path("../../datasets")
lagrangebench_path = base_path / "lagrangebench"
sfbc_path = base_path / "SFBC"

print("=== Lagrangebench Datasets ===")
if lagrangebench_path.exists():
    for dataset_dir in sorted(lagrangebench_path.iterdir()):
        if dataset_dir.is_dir():
            print(f"\n{dataset_dir.name}:")
            
            files = list(dataset_dir.glob("*"))[:5]  
            for f in files:
                print(f"  - {f.name}")
            if len(list(dataset_dir.glob("*"))) > 5:
                print(f"  {len(list(dataset_dir.glob('*'))) - 5} more files")
else:
    print(f"Lagrangebench path not found: {lagrangebench_path}")

print("\n\n=== SFBC Datasets ===")
if sfbc_path.exists():
    for dataset_dir in sorted(sfbc_path.iterdir()):
        if dataset_dir.is_dir():
            print(f"\n{dataset_dir.name}:")

            files = list(dataset_dir.glob("*"))[:5]  
            for f in files:
                print(f"  - {f.name}")
            if len(list(dataset_dir.glob("*"))) > 5:
                print(f" {len(list(dataset_dir.glob('*'))) - 5} more files")
else:
    print(f"SFBC path not found: {sfbc_path}")


=== Lagrangebench Datasets ===

2D_DAM_5740_20kevery100:
  - force.py
  - valid.h5
  - train.h5
  - test.h5
  - metadata.json

2D_LDC_2708_10kevery100:
  - valid.h5
  - train.h5
  - test.h5
  - metadata.json

2D_RPF_3200_20kevery100:
  - force.py
  - valid.h5
  - train.h5
  - test.h5
  - metadata.json

2D_TGV_2500_10kevery100:
  - valid.h5
  - train.h5
  - test.h5
  - metadata.json

3D_LDC_8160_10kevery100:
  - valid.h5
  - train.h5
  - test.h5
  - metadata.json

3D_RPF_8000_10kevery100:
  - force.py
  - valid.h5
  - train.h5
  - test.h5
  - metadata.json

3D_TGV_8000_10kevery100:
  - valid.h5
  - train.h5
  - test.h5
  - metadata.json


=== SFBC Datasets ===

SFBC_dataset_I:
  - Visualizer.ipynb
  - .gitattributes
  - example.png
  - .cache
  - data.png
  ... and 2 more files

SFBC_dataset_II:
  - Visualizer.ipynb
  - .gitattributes
  - example.png
  - .cache
  - data.png
  ... and 2 more files

SFBC_dataset_III:
  - Visualizer.ipynb
  - .gitattributes
  - example.png
  - .cache
  - d

In [None]:
sfbc_hdf5_files = list(sfbc_path.rglob("*.hdf5")) if sfbc_path.exists() else []
print(f"\nFound {len(sfbc_hdf5_files)} HDF5 files in SFBC datasets")

if sfbc_hdf5_files:
    # Load the first HDF5 file
    sample_sfbc_file = sfbc_hdf5_files[0]
    print(f"\nInspecting SFBC file: {sample_sfbc_file.relative_to(base_path)}")
    
    with h5py.File(sample_sfbc_file, 'r') as f:
        print("\nHDF5 file structure:")
        def print_structure(name, obj):
            indent = "  " * name.count('/')
            if isinstance(obj, h5py.Dataset):
                print(f"{indent}{name.split('/')[-1]}: {obj.shape} {obj.dtype}")
            else:
                print(f"{indent}{name.split('/')[-1]}/ (Group)")
        
        f.visititems(print_structure)
        
        # Get more detailed info about key datasets
        print("\n\nDetailed inspection:")
        for key in list(f.keys()):
            item = f[key]
            print(f"\n{key}:")
            if isinstance(item, h5py.Group):
                for subkey in list(item.keys()):
                    subitem = item[subkey]
                    if isinstance(subitem, h5py.Dataset):
                        data = np.array(subitem)
                        print(f"  {subkey}: shape={data.shape}, dtype={data.dtype}")
                        if data.size > 0:
                            print(f"    min={np.min(data):.4f}, max={np.max(data):.4f}, mean={np.mean(data):.4f}")
            elif isinstance(item, h5py.Dataset):
                data = np.array(item)
                print(f"  shape={data.shape}, dtype={data.dtype}")
                if data.size > 0:
                    print(f"  min={np.min(data):.4f}, max={np.max(data):.4f}, mean={np.mean(data):.4f}")
                
        # Check for attributes
        print("\n\nFile attributes:")
        for attr_name, attr_value in f.attrs.items():
            print(f"  {attr_name}: {attr_value}")



Found 160 HDF5 files in SFBC datasets

Inspecting SFBC file: SFBC/SFBC_dataset_IV/dataset/medJitter.hdf5

HDF5 file structure:
simulationData/ (Group)
  42307/ (Group)
    368198/ (Group)
      835327/ (Group)
        561777/ (Group)
          1030399/ (Group)
            485012/ (Group)
              837836/ (Group)
                708866/ (Group)
                  512823/ (Group)
                    460326/ (Group)
                      818272/ (Group)
                        768884/ (Group)
                          121621/ (Group)
                            421900/ (Group)
                              989724/ (Group)
                                498402/ (Group)
                                  1038289/ (Group)
                                    429452/ (Group)
                                      210375/ (Group)
                                        57381/ (Group)
                                          1028191/ (Group)
                                            16081

In [None]:
lagrangebench_h5_files = list(lagrangebench_path.rglob("*.h5")) if lagrangebench_path.exists() else []
print(f"Found {len(lagrangebench_h5_files)} H5 files in Lagrangebench datasets")

if lagrangebench_h5_files:
    # Load the first H5 file
    sample_lb_file = lagrangebench_h5_files[0]
    print(f"\nInspecting Lagrangebench file: {sample_lb_file.relative_to(base_path)}")
    
    with h5py.File(sample_lb_file, 'r') as f:
        print("\nH5 file structure:")
        def print_structure(name, obj):
            indent = "  " * name.count('/')
            if isinstance(obj, h5py.Dataset):
                print(f"{indent}{name.split('/')[-1]}: {obj.shape} {obj.dtype}")
            else:
                print(f"{indent}{name.split('/')[-1]}/ (Group)")
        
        f.visititems(print_structure)
        
        # Get more detailed info about datasets
        print("\n\nDetailed inspection:")
        
        # First, let's see what's at the root level
        root_keys = list(f.keys())
        print(f"Root level keys: {root_keys[:5]}...")  # Show first 5
        
        # Lagrangebench seems to have trajectory groups at root level
        if root_keys:
            # Inspect the first trajectory
            first_traj_key = root_keys[0]
            print(f"\nInspecting trajectory: {first_traj_key}")
            
            traj_group = f[first_traj_key]
            if isinstance(traj_group, h5py.Group):
                for key in list(traj_group.keys()):
                    item = traj_group[key]
                    if isinstance(item, h5py.Dataset):
                        data = np.array(item)
                        print(f"\n  {key}:")
                        print(f"    shape: {data.shape}, dtype: {data.dtype}")
                        if data.size > 0 and data.dtype.kind in ['f', 'i']:  # numeric types
                            print(f"    min={np.min(data):.4f}, max={np.max(data):.4f}, mean={np.mean(data):.4f}")
                        # Show first few samples if it's position data
                        if 'position' in key.lower() and len(data.shape) >= 2:
                            print(f"    First 3 samples of first timestep:")
                            print(f"    {data[0, :3]}")
                            
        # Check for attributes
        print("\n\nFile attributes:")
        for attr_name, attr_value in f.attrs.items():
            print(f"  {attr_name}: {attr_value}")
            
        # Check group attributes
        if root_keys and isinstance(f[root_keys[0]], h5py.Group):
            print(f"\nFirst trajectory attributes:")
            for attr_name, attr_value in f[root_keys[0]].attrs.items():
                print(f"  {attr_name}: {attr_value}")


Found 21 H5 files in Lagrangebench datasets

Inspecting Lagrangebench file: lagrangebench/3D_RPF_8000_10kevery100/valid.h5

H5 file structure:
00000/ (Group)
  particle_type: (8000,) int64
  position: (5001, 8000, 3) float32


Detailed inspection:
Root level keys: ['00000']...

Inspecting trajectory: 00000

  particle_type:
    shape: (8000,), dtype: int64
    min=0.0000, max=0.0000, mean=0.0000

  position:
    shape: (5001, 8000, 3), dtype: float32
    min=0.0000, max=2.0000, mean=0.5833
    First 3 samples of first timestep:
    [[0.7867463  0.38414297 0.27533227]
 [0.01887489 0.33585972 0.06739545]
 [0.19989185 1.8191481  0.25313228]]


File attributes:

First trajectory attributes:


In [None]:
format_comparison = {
    "SFBC": {},
    "Lagrangebench": {}
}

# Analyze SFBC format
if sfbc_hdf5_files:
    with h5py.File(sfbc_hdf5_files[0], 'r') as f:
        format_comparison["SFBC"]["file_extension"] = ".hdf5"
        format_comparison["SFBC"]["root_structure"] = list(f.keys())[:5]
        
        # Check for common data structures
        for key in list(f.keys()):
            item = f[key]
            if isinstance(item, h5py.Group):
                format_comparison["SFBC"][f"{key}_contents"] = list(item.keys())
            elif isinstance(item, h5py.Dataset):
                format_comparison["SFBC"][f"{key}_shape"] = item.shape

# Analyze Lagrangebench format
if lagrangebench_h5_files:
    with h5py.File(lagrangebench_h5_files[0], 'r') as f:
        format_comparison["Lagrangebench"]["file_extension"] = ".h5"
        format_comparison["Lagrangebench"]["root_structure"] = list(f.keys())[:5]
        format_comparison["Lagrangebench"]["total_trajectories"] = len(f.keys())
        
        # Analyze first trajectory
        if f.keys():
            first_traj = f[list(f.keys())[0]]
            if isinstance(first_traj, h5py.Group):
                format_comparison["Lagrangebench"]["trajectory_contents"] = list(first_traj.keys())
                
                # Get shapes of key datasets
                for key in list(first_traj.keys()):
                    item = first_traj[key]
                    if isinstance(item, h5py.Dataset):
                        format_comparison["Lagrangebench"][f"{key}_shape"] = item.shape

print("\n3. DATA ORGANIZATION:")
print("-" * 40)
print("SFBC:")
for key, value in format_comparison['SFBC'].items():
    if 'contents' in key or 'shape' in key:
        print(f"  {key}: {value}")

print("\nLagrangebench:")
for key, value in format_comparison['Lagrangebench'].items():
    if 'contents' in key or 'shape' in key:
        print(f"  {key}: {value}")

DATASET FORMAT COMPARISON

1. FILE STRUCTURE:
----------------------------------------
SFBC: .hdf5
Lagrangebench: .h5

2. ROOT LEVEL ORGANIZATION:
----------------------------------------
SFBC root keys: ['simulationData']
Lagrangebench root keys (first 5): ['00000']
Lagrangebench total trajectories: 1

3. DATA ORGANIZATION:
----------------------------------------
SFBC:
  simulationData_contents: ['42307']

Lagrangebench:
  trajectory_contents: ['particle_type', 'position']
  particle_type_shape: (8000,)
  position_shape: (5001, 8000, 3)

4. KEY DIFFERENCES:
----------------------------------------
• SFBC uses .hdf5 extension, Lagrangebench uses .h5
• Lagrangebench organizes data as trajectories at root level
• SFBC appears to have a different organizational structure
• Lagrangebench stores position data with shape (timesteps, particles, dimensions)
• Data needs to be reorganized to match SFBC's expected format


In [10]:
# Import the converter
sys.path.append('../..')  # Add SFBC parent directory to path
from SFBC.lagrangebench_converter import LagrangebenchToSFBCConverter, LagrangebenchSFBCDataset

# Test the converter on a Lagrangebench dataset
converter = LagrangebenchToSFBCConverter(lagrangebench_path)

# Let's try to load one of the Lagrangebench files
if lagrangebench_h5_files:
    print("Testing converter on:", lagrangebench_h5_files[0].relative_to(base_path))
    try:
        lb_data = converter.load_lagrangebench_file(lagrangebench_h5_files[0])
        
        print("\nLoaded Lagrangebench data keys:", list(lb_data.keys()))
        
        if 'position' in lb_data:
            print(f"\nPosition data shape: {lb_data['position'].shape}")
            print("Shape format: (particles, timesteps, dimensions)")
            
        if 'attrs' in lb_data:
            print(f"\nAttributes: {lb_data['attrs']}")
            
        # Try to convert to SFBC format
        print("\n" + "="*40)
        print("Testing conversion to SFBC format...")
        state, config = converter.convert_to_sfbc_format(lb_data, timestep_idx=0)
        
        print("\nConverted state keys:", list(state.keys()))
        print("Fluid state keys:", list(state['fluid'].keys()))
        
        for key, value in state['fluid'].items():
            if hasattr(value, 'shape'):
                print(f"  {key}: shape={value.shape}, dtype={value.dtype}")
                
        print("\nConfiguration:", config)
        
    except Exception as e:
        print(f"Error during conversion: {type(e).__name__}: {e}")
        import traceback
        traceback.print_exc()


Testing converter on: lagrangebench/3D_RPF_8000_10kevery100/valid.h5
Found 1 trajectories in file
Position data shape: (8000, 5001, 3) (particles, timesteps, dims)
Particle types shape: (8000,)

Loaded Lagrangebench data keys: ['position', 'particle_type', 'attrs']

Position data shape: (8000, 5001, 3)
Shape format: (particles, timesteps, dimensions)

Attributes: {'dt': 0.01, 'bounds': [[2.6024049581963027e-10, 1.0], [2.40395547912442e-09, 1.9999998807907104], [1.8729513939774733e-08, 0.5]]}

Testing conversion to SFBC format...

Converted state keys: ['fluid']
Fluid state keys: ['position', 'velocity', 'density', 'pressure', 'particle_type']
  position: shape=(8000, 3), dtype=float32
  velocity: shape=(8000, 3), dtype=float32
  density: shape=(8000,), dtype=float64
  pressure: shape=(8000,), dtype=float64
  particle_type: shape=(8000,), dtype=int64

Configuration: {'n_particles': 8000, 'dimension': 3, 'bounds': [[2.6024049581963027e-10, 1.0], [2.40395547912442e-09, 1.9999998807907104]