# Main NOTEBOOK for Dataset generation

In [1]:

import networkx as nx

import matplotlib.pyplot as plt
import plotly.graph_objects as go

import numpy as np
from math import cos, sin, radians
import torch

import os
import shutil
import subprocess
import struct
from glob import glob

import multiprocessing
from functools import partial
from concurrent.futures import ProcessPoolExecutor
import concurrent.futures
import subprocess

from sklearn.neighbors import NearestNeighbors


## Extract Mesh as NX graph

In [2]:

def load_points_vtk(file_path):
    points = []
    try:
        with open(file_path, 'rb') as file:
            header = []
            for _ in range(5): # header is 5 lines
                line = file.readline().decode('utf-8').strip()
                header.append(line)
            
            if not header[0].startswith('# vtk DataFile Version'):
                raise ValueError("Non è un file VTK valido")
            
            points_line = header[4]
            if not points_line.startswith('POINTS'):
                raise ValueError("Formato POINTS non trovato")
            
            num_points = int(points_line.split()[1])
            
            for _ in range(num_points):
                point_data = file.read(12)
                x, y, z = struct.unpack('>fff', point_data)
                points.append((x, y, z))
                
    except Exception as e:
        print(f"Errore durante la lettura del file: {str(e)}")
        return []
        
    return points

# file_path = "../.data/referenceCase/constant/polyMesh/points"
# file_path = "../.data/referenceCase/constant/triSurface/rocket.eMesh"
file_path = "../.data/referenceCase/VTK/rocket/rocket_0.VTK"
points = load_points_vtk(file_path)
print(len(points))
# print(points)


848


In [3]:

def plot_3d_graph(points, connectivity=None):

    points_trace = go.Scatter3d(
        x=[point[0] for point in points],
        y=[point[1] for point in points],
        z=[point[2] for point in points],
        mode='markers',
        marker=dict(
            size=2,
            color='blue',
            opacity=0.8
        ),
        name='Nodes'
    )
    
    edge_x = []
    edge_y = []
    edge_z = []
    
    if connectivity is not None:
        for i in range(connectivity.shape[1]):
            start_node = connectivity[0, i]
            end_node = connectivity[1, i]
            
            edge_x.extend([points[start_node][0], points[end_node][0], None])
            edge_y.extend([points[start_node][1], points[end_node][1], None])
            edge_z.extend([points[start_node][2], points[end_node][2], None])
        
        edges_trace = go.Scatter3d(
            x=edge_x,
            y=edge_y,
            z=edge_z,
            mode='lines',
            line=dict(
                color='gray',
                width=1
            ),
            opacity=0.3,
            name='Edges'
        )
    
        fig = go.Figure(data=[points_trace, edges_trace])
    else:
        fig = go.Figure(data=[points_trace])
    
    fig.update_layout(
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z'
        ),
        width=1500,
        height=900,
        showlegend=True
    )
    
    fig.show()

plot_3d_graph(points)


## Create nodes connectivity

In [4]:

def create_connectivity(points, n_neighbors):
    
    nbrs = NearestNeighbors(n_neighbors=n_neighbors+1, algorithm='ball_tree')
    nbrs.fit(points)
    distances, indices = nbrs.kneighbors(points)
    
    sources = []
    destinations = []
    
    for i in range(len(points)):
        for j in indices[i][1:]:
            sources.append(i)
            destinations.append(j)
            
    connectivity = torch.tensor([sources, destinations])
    
    return connectivity

n_neighbors = 10
connectivity = create_connectivity(points, n_neighbors)

print(connectivity)
print(connectivity.shape)
plot_3d_graph(points, connectivity)


tensor([[  0,   0,   0,  ..., 847, 847, 847],
        [401,   1, 307,  ..., 800, 719, 194]])
torch.Size([2, 8480])


## Simulate Data

In [5]:

# def ignore_vtk(dirname, filenames):

#     if os.path.basename(dirname) == 'VTK':
#         return filenames
#     return []

# def create_case_directory(base_case, case_dir):

#     if os.path.exists(case_dir):
#         shutil.rmtree(case_dir)
    
#     shutil.copytree(base_case, case_dir, ignore=ignore_vtk)

# def modify_U_file(case_dir, velocity, angle):
    
#     vz = velocity * cos(radians(angle))
#     vy = velocity * sin(radians(angle))
    
#     U_file = os.path.join(case_dir, "0", "U")
#     with open(U_file, 'r') as f:
#         lines = f.readlines()
    
#     new_lines = []
#     i = 0
#     while i < len(lines):
#         line = lines[i]
#         if "internalField" in line:
#             new_lines.append(line)
#             new_lines.append(f"uniform (0 {vy} {vz});\n")
#             i += 2  # ignore old line
#         elif "inlet" in line and "value" in lines[i+2]:
#             new_lines.extend(lines[i:i+2])  # add 'inlet {' e 'type fixedValue;'
#             new_lines.append(f"        value           uniform (0 {vy} {vz});\n")
#             i += 4  # jump to the inlet end
#         else:
#             new_lines.append(line)
#             i += 1
    
#     with open(U_file, 'w') as f:
#         f.writelines(new_lines)

# def run_simulation(case_dir):

#     abs_path = os.path.abspath(case_dir)
#     abs_path = abs_path.replace('C:\\', '').replace('c:\\', '')
    
#     wsl_path = '/mnt/c/' + abs_path.replace('\\', '/')
    
#     # print(f"Percorso WSL: {wsl_path}")
    
#     wsl_command = f"""
#     . $HOME/OpenFOAM-12/etc/bashrc && \
#     cd "{wsl_path}" && \
#     rhoCentralFoam
#     """
    
#     try:
#         result = subprocess.run(
#             ['wsl', 'bash', '-l', '-c', wsl_command],
#             capture_output=True,
#             text=True,
#             check=True
#         )
#         print("Output:", result.stdout)
#     except subprocess.CalledProcessError as e:
#         print("Errore nell'esecuzione:")
#         print("Output:", e.output)
#         print("Error:", e.stderr)
#         raise


# n_simulations = 10
# base_case = "../.data/referenceCase"

# velocity_range = (20, 40)  # m/s
# angle_range = (-5, 5)    # gradi

# velocities = np.random.uniform(velocity_range[0], velocity_range[1], n_simulations)
# angles = np.random.uniform(angle_range[0], angle_range[1], n_simulations)

# simulation_parameter_dir = "simulation_parameters.csv"
# with open(simulation_parameter_dir, "w") as f:
#     f.write("case,velocity,angle\n")

# for i in range(n_simulations):
#     case_name = f"case_{i:03d}"
#     case_dir = os.path.join("../.data", case_name)
    
#     with open(simulation_parameter_dir, "a") as f:
#         f.write(f"{case_name},{velocities[i]:.2f},{angles[i]:.2f}\n")
    
#     print(f"\nRunning simulation ---> {i+1}/{n_simulations} - [velocity {velocities[i]:.2f} m/s, angle {angles[i]:.2f}]" )
    
#     create_case_directory(base_case, case_dir)
#     modify_U_file(case_dir, velocities[i], angles[i])
    
#     try:
#         run_simulation(case_dir)
#     except subprocess.CalledProcessError as e:
#         print(f"Errore nella simulazione {case_dir}: {e}")
#         continue


### multiprocessed

In [None]:
import threading

def ignore_vtk(dirname, filenames):

    if os.path.basename(dirname) == 'VTK':
        return filenames
    return []

def create_case_directory(base_case, case_dir):

    if os.path.exists(case_dir):
        shutil.rmtree(case_dir)
    
    shutil.copytree(base_case, case_dir, ignore=ignore_vtk)

def modify_U_file(case_dir, velocity, angle):
    
    vz = velocity * cos(radians(angle))
    vy = velocity * sin(radians(angle))
    
    U_file = os.path.join(case_dir, "0", "U")
    with open(U_file, 'r') as f:
        lines = f.readlines()
    
    new_lines = []
    i = 0
    while i < len(lines):
        line = lines[i]
        if "internalField" in line:
            new_lines.append(line)
            new_lines.append(f"uniform (0 {vy} {vz});\n")
            i += 2  # ignore old line
        elif "inlet" in line and "value" in lines[i+2]:
            new_lines.extend(lines[i:i+2])  # add 'inlet {' e 'type fixedValue;'
            new_lines.append(f"        value           uniform (0 {vy} {vz});\n")
            i += 4  # jump to the inlet end
        else:
            new_lines.append(line)
            i += 1
    
    with open(U_file, 'w') as f:
        f.writelines(new_lines)

def run_simulation(case_dir):

    abs_path = os.path.abspath(case_dir)
    abs_path = abs_path.replace('C:\\', '').replace('c:\\', '')
    
    wsl_path = '/mnt/c/' + abs_path.replace('\\', '/')
    
    # print(f"Percorso WSL: {wsl_path}")
    
    wsl_command = f"""
    . $HOME/OpenFOAM-12/etc/bashrc && \
    cd "{wsl_path}" && \
    rhoCentralFoam
    """
    
    try:
        result = subprocess.run(
            ['wsl', 'bash', '-l', '-c', wsl_command],
            capture_output=True,
            text=True,
            check=True
        )
        print("Output:", result.stdout)
    except subprocess.CalledProcessError as e:
        print("Errore nell'esecuzione:")
        print("Output:", e.output)
        print("Error:", e.stderr)
        raise

def run_parallel_simulations(velocities, angles, n_simulations, base_case, simulation_parameter_dir, max_concurrent=4):
    def run_batch(start_idx, end_idx):
        for i in range(start_idx, end_idx):
            case_name = f"case_{i:03d}"
            case_dir = os.path.join("../.data", case_name)
            
            print(f"\nRunning simulation ---> {i+1}/{n_simulations} - [velocity {velocities[i]:.2f} m/s, angle {angles[i]:.2f}]")
            
            with open(simulation_parameter_dir, "a") as f:
                f.write(f"{case_name},{velocities[i]:.2f},{angles[i]:.2f}\n")
            
            create_case_directory(base_case, case_dir)
            modify_U_file(case_dir, velocities[i], angles[i])
            
            try:
                run_simulation(case_dir)
                results[i] = True
            except subprocess.CalledProcessError as e:
                print(f"Errore nella simulazione {case_dir}: {e}")
                results[i] = False

    results = [False] * n_simulations
    
    batch_size = n_simulations // max_concurrent + (1 if n_simulations % max_concurrent else 0)
    threads = []
    
    for i in range(0, n_simulations, batch_size):
        end_idx = min(i + batch_size, n_simulations)
        thread = threading.Thread(target=run_batch, args=(i, end_idx))
        threads.append(thread)
        thread.start()
    
    for thread in threads:
        thread.join()
    
    return results
    
n_simulations = 10
base_case = "../.data/referenceCase"

velocity_range = (320, 360)  # m/s
angle_range = (-10, 10)    # gradi

velocities = np.random.uniform(velocity_range[0], velocity_range[1], n_simulations)
angles = np.random.uniform(angle_range[0], angle_range[1], n_simulations)

simulation_parameter_dir = "simulation_parameters.csv"
with open(simulation_parameter_dir, "w") as f:
    f.write("case,velocity,angle\n")

n_workers = 6

sim_args = [(velocities[i], angles[i], i, n_simulations, base_case, simulation_parameter_dir) 
            for i in range(n_simulations)]

results = []

with open(simulation_parameter_dir, "w") as f:
    f.write("case,velocity,angle\n")

results = run_parallel_simulations(velocities, angles, n_simulations, base_case, 
                                    simulation_parameter_dir, max_concurrent=4)

successful = sum(results)
print(f"\nSimulazioni completate con successo: {successful}/{n_simulations}")


Running simulation ---> 1/10 - [velocity 326.83 m/s, angle 4.54]

Running simulation ---> 4/10 - [velocity 339.24 m/s, angle -0.01]

Running simulation ---> 7/10 - [velocity 324.76 m/s, angle 8.58]

Running simulation ---> 10/10 - [velocity 331.04 m/s, angle 5.20]


## Normalize data

In [None]:
def normalize_data(data, method='minmax'):

    if method == 'minmax':
        data_min = np.min(data)
        data_max = np.max(data)
        if data_max - data_min != 0:
            normalized = (data - data_min) / (data_max - data_min)
        else:
            normalized = data
        params = {'min': data_min, 'max': data_max}
        
    elif method == 'standard':
        mean = np.mean(data)
        std = np.std(data)
        if std != 0:
            normalized = (data - mean) / std
        else:
            normalized = data
        params = {'mean': mean, 'std': std}
        
    return normalized, params

def normalize_data_global(data, field_name, global_mins, global_maxs):
    """
    Normalizza i dati usando min/max globali
    """
    data_min = global_mins[field_name]
    data_max = global_maxs[field_name]
    if data_max - data_min != 0:
        normalized = (data - data_min) / (data_max - data_min)
    else:
        normalized = data
    return normalized, {'min': data_min, 'max': data_max}

## Extract data from simulations

In [None]:

def find_matching_indices(mesh_points, surface_points, field_size, tolerance=1e-6):
    """
    field_size: dimensione del campo più piccolo (p, U, T, etc)
    """
    indices = []
    mesh_points = np.array(mesh_points)[:field_size]
    surface_points = np.array(surface_points)
    
    print(f"Mesh points shape: {mesh_points.shape}")
    print(f"Surface points shape: {surface_points.shape}")
    
    for i, surface_point in enumerate(surface_points):
        distances = np.linalg.norm(mesh_points - surface_point, axis=1)
        matching = np.where(distances < tolerance)[0]

        if len(matching) > 0:
            indices.append(matching[0])
        else:
            closest_idx = np.argmin(distances)
            min_distance = distances[closest_idx]
            # print(f"Warning: No matching point found for surface point {i}, using closest at distance {min_distance}")
            indices.append(closest_idx)
    
    indices = np.array(indices)
    # print(f"Found {len(indices)} matching points")
    return indices

def get_latest_time(case_dir):

    time_dirs = [d for d in os.listdir(case_dir) 
                if d.replace('.','').isdigit() and os.path.isdir(os.path.join(case_dir, d))]
    if not time_dirs:
        return None
    
    latest_time = max(float(t) for t in time_dirs)
    if latest_time == 1.0:
        latest_time = int(latest_time)
    return str(latest_time)

def read_openfoam_points(filename):

    points = []
    with open(filename, 'r') as f:
        lines = f.readlines()
        start_idx = 0
        for i, line in enumerate(lines):
            if line.strip() == '(':
                start_idx = i + 1
                break
        
        for line in lines[start_idx:]:
            line = line.strip()
            if line == ')':
                break
            
            if line.startswith('(') and line.endswith(')'):
                values = line.strip('()').split()
                values = [float(x) for x in values]
                points.append(values)
    
    return np.array(points)

def read_openfoam_field(filename):

    data = []
    with open(filename, 'r') as f:
        lines = f.readlines()
        start_idx = 0
        for i, line in enumerate(lines):
            if line.strip() == '(':
                start_idx = i + 1
                break
        
        for line in lines[start_idx:]:
            line = line.strip()
            if line == ')':
                break
            
            if line.startswith('(') and line.endswith(')'):

                values = line.strip('()').split()

                values = [float(x) for x in values]
                data.append(values)

            elif line:
                try:
                    value = float(line)
                    data.append(value)
                except ValueError as e: # for some reason it often breaks here
                    print(f"Parsing error at line: {line}")
                    raise e
    
    return np.array(data)

def extract_case_data(case_dir, points):
    latest_time = get_latest_time(case_dir)
    # print(latest_time)
    if latest_time is None:
        print(f"No data in {case_dir}")
        return None

    time_dir = os.path.join(case_dir, latest_time)
    data = {}

    mesh_points_file = os.path.join(case_dir, 'constant/polyMesh/points')
    if os.path.exists(mesh_points_file):

        mesh_points = read_openfoam_points(mesh_points_file)
        print(f"Total mesh points: {len(mesh_points)}")
        surface_indices = find_matching_indices(mesh_points, points, field_size=len(mesh_points))

    else:
        print(f"Mesh points file not found in {case_dir}")
        return None

    fields = {
        'p': 'p',
        'U': 'U',
        'T': 'T',
        'rho': 'rho',
        'phi': 'phi'
    }

    fields_data = {}
    for field_name, file_name in fields.items():
        file_path = os.path.join(time_dir, file_name)
        if not os.path.exists(file_path):
            print(f"{field_name} not found in {case_dir}")
            return None
        fields_data[field_name] = read_openfoam_field(file_path)
        print(f"Field {field_name} shape: {fields_data[field_name].shape}")

    min_field_size = min(len(field) for field in fields_data.values())
    print(f"Minimum field size: {min_field_size}")

    surface_indices = find_matching_indices(mesh_points, points, min_field_size)

    data = {}
    for field_name in fields:
        data[field_name] = fields_data[field_name][surface_indices]
        print(f"Extracted {field_name} values: {len(data[field_name])}")

        assert len(data[field_name]) == len(points)

    return data

base_dir = '.'
output_dir_probes = '../.data/Extracted_data/probes'
output_dir_features = '../.data/Extracted_data/fields'
checkpoints_dir_features = '../.data/Extracted_data/checkpoints'
os.makedirs(output_dir_probes, exist_ok=True)
os.makedirs(output_dir_features, exist_ok=True)
os.makedirs(checkpoints_dir_features, exist_ok=True)

case_dirs = sorted(glob('../.data/case_[0-9][0-9][0-9]'))

for case_dir in case_dirs:
    case_num = case_dir[-3:]
    print(f"Processing {case_dir}...")

    data = extract_case_data(case_dir, points)
    if data is None:
        continue

    p = data['p']
    U = data['U']
    T = data['T']
    rho = data['rho']
    phi = data['phi']

    zero = np.zeros((len(points)))
    zeros = np.zeros((len(points), 3))

    output_file = os.path.join(output_dir_features, f'SIM{case_num}_features.npz')
    np.savez(output_file,
             p=p,
             U_x=U[:, 0],
             U_y=U[:, 1],
             U_z=U[:, 2],
             T=T,
             rho=rho,
             phi=phi,
             zero1=zero, # <--------------
             zero2=zero, # <-------------- TEMPORARY: i need 10 columns to match save.ipynb
             zero3=zero) # <-------------- 
    
    points_output_file = os.path.join(output_dir_probes, f'SIM{case_num}_probepos.npy')
    points_with_zeros = np.hstack((points, zeros))
    np.save(points_output_file, points_with_zeros) # <-------------- TEMPORARY: i need 6 columns to match save.ipynb


# save previously computed connectivity
np.save('../.data/Extracted_data/connectivity.npy', connectivity)


Processing ../.data\case_000...
Total mesh points: 34476
Mesh points shape: (34476, 3)
Surface points shape: (848, 3)


Field p shape: (31250,)
Field U shape: (31250, 3)
Field T shape: (31250,)
Field rho shape: (31250,)
Field phi shape: (90625,)
Minimum field size: 31250
Mesh points shape: (31250, 3)
Surface points shape: (848, 3)
Extracted p values: 848
Extracted U values: 848
Extracted T values: 848
Extracted rho values: 848
Extracted phi values: 848
Processing ../.data\case_001...
Total mesh points: 34476
Mesh points shape: (34476, 3)
Surface points shape: (848, 3)
Field p shape: (31250,)
Field U shape: (31250, 3)
Field T shape: (31250,)
Field rho shape: (31250,)
Field phi shape: (90625,)
Minimum field size: 31250
Mesh points shape: (31250, 3)
Surface points shape: (848, 3)
Extracted p values: 848
Extracted U values: 848
Extracted T values: 848
Extracted rho values: 848
Extracted phi values: 848
Processing ../.data\case_002...
Total mesh points: 34476
Mesh points shape: (34476, 3)
Surface points shape: (848, 3)
Field p shape: (31250,)
Field U shape: (31250, 3)
Field T shape: (31250,)
Field rho shape:

### normalized

In [None]:

# def find_matching_indices(mesh_points, surface_points, field_size, tolerance=1e-6):
#     """
#     field_size: dimensione del campo più piccolo (p, U, T, etc)
#     """
#     indices = []
#     mesh_points = np.array(mesh_points)[:field_size]
#     surface_points = np.array(surface_points)
    
#     print(f"Mesh points shape: {mesh_points.shape}")
#     print(f"Surface points shape: {surface_points.shape}")
    
#     for i, surface_point in enumerate(surface_points):
#         distances = np.linalg.norm(mesh_points - surface_point, axis=1)
#         matching = np.where(distances < tolerance)[0]

#         if len(matching) > 0:
#             indices.append(matching[0])
#         else:
#             closest_idx = np.argmin(distances)
#             min_distance = distances[closest_idx]
#             # print(f"Warning: No matching point found for surface point {i}, using closest at distance {min_distance}")
#             indices.append(closest_idx)
    
#     indices = np.array(indices)
#     # print(f"Found {len(indices)} matching points")
#     return indices

# def get_latest_time(case_dir):

#     time_dirs = [d for d in os.listdir(case_dir) 
#                 if d.replace('.','').isdigit() and os.path.isdir(os.path.join(case_dir, d))]
#     if not time_dirs:
#         return None
    
#     latest_time = max(float(t) for t in time_dirs)
#     if latest_time == 1.0:
#         latest_time = int(latest_time)
#     return str(latest_time)

# def read_openfoam_points(filename):

#     points = []
#     with open(filename, 'r') as f:
#         lines = f.readlines()
#         start_idx = 0
#         for i, line in enumerate(lines):
#             if line.strip() == '(':
#                 start_idx = i + 1
#                 break
        
#         for line in lines[start_idx:]:
#             line = line.strip()
#             if line == ')':
#                 break
            
#             if line.startswith('(') and line.endswith(')'):
#                 values = line.strip('()').split()
#                 values = [float(x) for x in values]
#                 points.append(values)
    
#     return np.array(points)

# def read_openfoam_field(filename):

#     data = []
#     with open(filename, 'r') as f:
#         lines = f.readlines()
#         start_idx = 0
#         for i, line in enumerate(lines):
#             if line.strip() == '(':
#                 start_idx = i + 1
#                 break
        
#         for line in lines[start_idx:]:
#             line = line.strip()
#             if line == ')':
#                 break
            
#             if line.startswith('(') and line.endswith(')'):

#                 values = line.strip('()').split()

#                 values = [float(x) for x in values]
#                 data.append(values)

#             elif line:
#                 try:
#                     value = float(line)
#                     data.append(value)
#                 except ValueError as e: # for some reason it often breaks here
#                     print(f"Parsing error at line: {line}")
#                     raise e
    
#     return np.array(data)

# def extract_case_data(case_dir, points):
#     latest_time = get_latest_time(case_dir)
#     # print(latest_time)
#     if latest_time is None:
#         print(f"No data in {case_dir}")
#         return None

#     time_dir = os.path.join(case_dir, latest_time)
#     data = {}

#     mesh_points_file = os.path.join(case_dir, 'constant/polyMesh/points')
#     if os.path.exists(mesh_points_file):

#         mesh_points = read_openfoam_points(mesh_points_file)
#         print(f"Total mesh points: {len(mesh_points)}")
#         surface_indices = find_matching_indices(mesh_points, points, field_size=len(mesh_points))

#     else:
#         print(f"Mesh points file not found in {case_dir}")
#         return None

#     fields = {
#         'p': 'p',
#         'U': 'U',
#         'T': 'T',
#         'rho': 'rho',
#         'phi': 'phi'
#     }

#     fields_data = {}
#     for field_name, file_name in fields.items():
#         file_path = os.path.join(time_dir, file_name)
#         if not os.path.exists(file_path):
#             print(f"{field_name} not found in {case_dir}")
#             return None
#         fields_data[field_name] = read_openfoam_field(file_path)
#         print(f"Field {field_name} shape: {fields_data[field_name].shape}")

#     min_field_size = min(len(field) for field in fields_data.values())
#     print(f"Minimum field size: {min_field_size}")

#     surface_indices = find_matching_indices(mesh_points, points, min_field_size)

#     data = {}
#     for field_name in fields:
#         data[field_name] = fields_data[field_name][surface_indices]
#         print(f"Extracted {field_name} values: {len(data[field_name])}")

#         assert len(data[field_name]) == len(points)

#     return data

# base_dir = '.'
# output_dir_probes = '../.data/Extracted_data/probes'
# output_dir_features = '../.data/Extracted_data/fields'
# checkpoints_dir_features = '../.data/Extracted_data/checkpoints'
# os.makedirs(output_dir_probes, exist_ok=True)
# os.makedirs(output_dir_features, exist_ok=True)
# os.makedirs(checkpoints_dir_features, exist_ok=True)

# case_dirs = sorted(glob('../.data/case_[0-9][0-9][0-9]'))


# ### FIND GLOBAL MAX AND MIN

# global_mins = {}
# global_maxs = {}

# for case_dir in case_dirs:
#     case_num = case_dir[-3:]
#     print(f"Finding global min/max in {case_dir}...")
    
#     data = extract_case_data(case_dir, points)
#     if data is None:
#         continue
        
#     fields_to_check = {
#         'p': data['p'],
#         'U_x': data['U'][:, 0],
#         'U_y': data['U'][:, 1],
#         'U_z': data['U'][:, 2],
#         'T': data['T'],
#         'rho': data['rho'],
#         'phi': data['phi']
#     }
    
#     for field_name, field_data in fields_to_check.items():
#         if field_name not in global_mins:
#             global_mins[field_name] = np.min(field_data)
#             global_maxs[field_name] = np.max(field_data)
#         else:
#             global_mins[field_name] = min(global_mins[field_name], np.min(field_data))
#             global_maxs[field_name] = max(global_maxs[field_name], np.max(field_data))


# ### SAVE DATA

# for case_dir in case_dirs:
#     case_num = case_dir[-3:]
#     print(f"Processing {case_dir}...")
    
#     data = extract_case_data(case_dir, points)
#     if data is None:
#         continue
        
#     normalized_data = {}
#     normalization_params = {}
    
#     fields_to_normalize = {
#         'p': data['p'],
#         'U_x': data['U'][:, 0],
#         'U_y': data['U'][:, 1],
#         'U_z': data['U'][:, 2],
#         'T': data['T'],
#         'rho': data['rho'],
#         'phi': data['phi']
#     }
    
#     for field_name, field_data in fields_to_normalize.items():
#         normalized_data[field_name], normalization_params[field_name] = normalize_data_global(field_data, field_name, global_mins, global_maxs)

#     zero = np.zeros((len(points)))
#     zeros = np.zeros((len(points), 3))

#     output_file = os.path.join(output_dir_features, f'SIM{case_num}_features.npz')
#     np.savez(output_file,
#             p=normalized_data['p'],
#             U_x=normalized_data['U_x'],
#             U_y=normalized_data['U_y'],
#             U_z=normalized_data['U_z'],
#             T=normalized_data['T'],
#             rho=normalized_data['rho'],
#             phi=normalized_data['phi'],
#             zero1=zero, # <--------------
#             zero2=zero, # <-------------- TEMPORARY: i need 10 columns to match save.ipynb
#             zero3=zero) # <--------------
    
#     points_output_file = os.path.join(output_dir_probes, f'SIM{case_num}_probepos.npy')
#     points_with_zeros = np.hstack((points, zeros))
#     np.save(points_output_file, points_with_zeros) # <-------------- TEMPORARY: i need 6 columns to match save.ipynb


# # save previously computed connectivity
# np.save('../.data/Extracted_data/connectivity.npy', connectivity)


# # also save global params for normalization
# global_params = {
#     'mins': global_mins,
#     'maxs': global_maxs
# }
# np.savez('../.data/Extracted_data/global_norm_params.npz', **global_params)
