Data generator from the Uncompressed mesh

In [None]:
# Setting the Working Directory to the Notebook's Path

import os
from pathlib import Path

# Get the directory of the current notebook
notebook_path = Path().resolve()
os.chdir(notebook_path)

# Confirm the current working directory
print("Current working directory:", os.getcwd())

GNNs-BreastCompression

Read the mesh file

In [2]:
import vtk
import numpy as np
from collections import defaultdict, Counter 
import pandas as pd
import math
import csv

file_path = "uncompressed_nrrd/volmesh3.vtk" # be carefull to the file
reader = vtk.vtkUnstructuredGridReader()
reader.SetFileName(file_path)
reader.Update()

unstructuredGrid = reader.GetOutput()
points = unstructuredGrid.GetPoints()  # points (coordinates) of the grid
cells = unstructuredGrid.GetCells()  # cells of the grid
cell_types = unstructuredGrid.GetCellTypesArray()  # cell types of the grid
cell_data = unstructuredGrid.GetCellData()  # cell data associated with the grid
point_data = unstructuredGrid.GetPointData()  # point data associated with the grid
# Bounding Box of the grid:
bbox = points.GetBounds()  # Return a pointer to the geometry bounding box in the form (xmin,xmax, ymin,ymax, zmin,zmax).
print("Bounding Box Coordinates (xmin, xmax, ymin, ymax, zmin, zmax):", bbox)
num_cells = unstructuredGrid.GetNumberOfCells()
num_points = unstructuredGrid.GetNumberOfPoints()

Bounding Box Coordinates (xmin, xmax, ymin, ymax, zmin, zmax): (31.819000244140625, 153.58299255371094, 39.951900482177734, 166.66000366210938, -0.06298580020666122, 93.52210235595703)


Elements and their nodes indices

In [6]:
def get_tetrahedron_elements(unstructuredGrid):
    """
    Retrieve tetrahedron elements and their point indices (1-based).
    
    Parameters:
    unstructuredGrid (vtkUnstructuredGrid): The unstructured grid to retrieve elements from.
    
    Returns:
    list: A list of lists, where each inner list contains the 1-based point indices of a tetrahedron.
    """
    tetrahedron_elements = []

    # Loop through each cell in the unstructured grid
    num_cells = unstructuredGrid.GetNumberOfCells()
    for cell_id in range(num_cells):
        cell = unstructuredGrid.GetCell(cell_id)

        # Check if the cell is a tetrahedron
        if cell.GetCellType() == vtk.VTK_TETRA:
            point_ids = cell.GetPointIds()

            # Convert point indices to 1-based
            point_indices = [point_ids.GetId(i) + 1 for i in range(point_ids.GetNumberOfIds())]

            # Add the point indices to the list
            tetrahedron_elements.append(point_indices)

    return tetrahedron_elements

# Retrieve tetrahedron elements (only point indices)
tetrahedron_elements = get_tetrahedron_elements(unstructuredGrid)

# Save to CSV (no header, only point indices)
with open("Data_Generator/input/elements.csv", "w", newline="") as csvfile:
    csv_writer = csv.writer(csvfile)

    # Write each tetrahedron's point indices
    for point_indices in tetrahedron_elements:
        csv_writer.writerow(point_indices)

print("CSV file 'elements.csv' has been created.")

CSV file 'elements.csv' has been created.


Material IDs

In [8]:
# Get the number of cells in the unstructured grid
num_cells = unstructuredGrid.GetNumberOfCells()

# Retrieve the material scalar data for the cells
material_scalars = unstructuredGrid.GetCellData().GetScalars("materials")

# Prepare a list for the material IDs
material_ids = []

# Loop through each cell to get the material ID
for cell_id in range(num_cells):
    material_id = material_scalars.GetTuple1(cell_id)  # Get material ID for the cell
    material_ids.append(material_id)

# Save to CSV (no header, only material IDs)
with open("Data_Generator/input/element_ID.csv", "w", newline="") as csvfile:
    csv_writer = csv.writer(csvfile)

    # Write each material ID to the CSV file
    for material_id in material_ids:
        csv_writer.writerow([int(material_id)])

print("CSV file 'element_ID.csv' has been created.")

CSV file 'element_ID.csv' has been created.


Nodes Coordinates - xyz

In [9]:
# Get the number of points (nodes) in the unstructured grid
num_points = unstructuredGrid.GetNumberOfPoints()

# List to store the point coordinates
node_coordinates = []

# Loop through each point to get its coordinates
for point_id in range(num_points):
    point_coords = unstructuredGrid.GetPoint(point_id)
    
    # Append the coordinates (x, y, z) to the list (excluding the point ID)
    node_coordinates.append(point_coords)

# Write the data to a CSV file (no header, only x, y, z coordinates)
with open("Data_Generator/input/xyz.csv", "w", newline="") as csvfile:
    csv_writer = csv.writer(csvfile)

    # Write each point's coordinates (x, y, z) to the CSV file
    for coords in node_coordinates:
        csv_writer.writerow(coords)

print("CSV file 'xyz.csv' has been created.")

CSV file 'xyz.csv' has been created.


Rigid ID

In [10]:
# Retrieve the boundary condition scalar from point data
boundary_scalars = unstructuredGrid.GetPointData().GetScalars("boundaryConditions")

# List to store the point boundary conditions
node_boundary_conditions = []

# Loop through each point to get its boundary condition
for point_id in range(num_points):
    # Get the boundary condition scalar for this point
    rigid_id = boundary_scalars.GetTuple1(point_id)  # Assumes it's a single-component scalar

    # Convert point ID to 1-based
    one_based_point_id = point_id + 1

    # Append the point ID and boundary condition to the list
    node_boundary_conditions.append([one_based_point_id, rigid_id])

# Write the data to a CSV file
with open("Data_Generator/input/bcSupportList.csv", "w", newline="") as csvfile:
    csv_writer = csv.writer(csvfile)
    # Write header
    csv_writer.writerow(["Node ID", "Rigid_ID"])  # Custom header for boundary conditions

    # Write each point's ID and boundary condition
    for node in node_boundary_conditions:
        csv_writer.writerow([int(node[0]), int(node[1])])

print("CSV file 'bcSupportList.csv' has been created.")

CSV file 'bcSupportList.csv' has been created.


Find surface nodes

In [4]:
def compute_normals(unstructured_grid):
    # Compute gradients (which can be used to derive normals)
    gradient_filter = vtk.vtkCellDerivatives()
    gradient_filter.SetInputData(unstructured_grid)
    gradient_filter.SetVectorModeToComputeGradient()
    gradient_filter.Update()

    # Access the computed gradients
    gradients = gradient_filter.GetOutput().GetCellData().GetVectors()
    if gradients is None:
        raise RuntimeError("Gradients not found in the dataset.")

    # Average the cell gradients to point gradients
    point_gradients = vtk.vtkFloatArray()
    point_gradients.SetNumberOfComponents(3)
    point_gradients.SetName("Gradients")

    num_points = unstructured_grid.GetNumberOfPoints()
    for i in range(num_points):
        point_gradient = [0.0, 0.0, 0.0]
        cell_ids = vtk.vtkIdList()
        unstructured_grid.GetPointCells(i, cell_ids)
        num_cells = cell_ids.GetNumberOfIds()

        for j in range(num_cells):
            cell_id = cell_ids.GetId(j)
            cell_gradient = gradients.GetTuple(cell_id)
            point_gradient = [sum(x) for x in zip(point_gradient, cell_gradient)]

        point_gradient = [x / num_cells for x in point_gradient]
        point_gradients.InsertNextTuple(point_gradient)

    unstructured_grid.GetPointData().SetVectors(point_gradients)
    return unstructured_grid

def extract_normals(unstructured_grid):
    # Extract gradients from unstructured grid and normalize to get normals
    gradients = unstructured_grid.GetPointData().GetVectors()
    if gradients is None:
        raise RuntimeError("Gradients not found in the dataset.")

    # Convert VTK array to a numpy array
    num_points = unstructured_grid.GetNumberOfPoints()
    normals_array = np.zeros((num_points, 3))
    for i in range(num_points):
        gradient = np.array(gradients.GetTuple(i))
        norm = np.linalg.norm(gradient)
        if norm != 0:
            normal = gradient / norm
        else:
            normal = gradient
        normals_array[i] = normal

    return normals_array

def save_normals_to_csv(filename, normals_array):
    # Save the node indices and their respective normals to a CSV file
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Node Index', 'Normal X', 'Normal Y', 'Normal Z'])
        for i, normal in enumerate(normals_array):
            writer.writerow([i + 1, normal[0], normal[1], normal[2]])

def main(unstructured_grid, output_csv_filename):

    # Compute normals
    unstructured_grid_with_gradients = compute_normals(unstructured_grid)
    
    # Extract normals
    normals_array = extract_normals(unstructured_grid_with_gradients)
    
    # # Save normals to CSV
    save_normals_to_csv(output_csv_filename, normals_array)

# Example usage
output_csv_filename = 'Data_Generator/input/normals.csv'
main(unstructuredGrid, output_csv_filename)
print(f"Normals saved to {output_csv_filename}")
'''
#########################################################
# Extract node indices 1-based with non-zero normals and nodes with non-zero normals
'''
# Load the CSV file
df = pd.read_csv('Data_Generator/input/normals.csv')

# Filter nodes with non-zero normals
non_zero_normals = df[(df['Normal X'] != 0) | (df['Normal Y'] != 0) | (df['Normal Z'] != 0)]

# Extract node indices with non-zero normals
node_indices_non_zero_normals = non_zero_normals['Node Index']

node_indices_non_zero_normals_df = node_indices_non_zero_normals.to_frame(name="Node Index")

# Load the support list
bc_support_list = pd.read_csv("Data_Generator/input/bcSupportList.csv")


# Check if each index is in the support list and has Rigid_ID equal to 1
for index, row in node_indices_non_zero_normals_df.iterrows():
    node_id = row["Node Index"]
    if node_id in bc_support_list["Node ID"].values:
        rigid_id = bc_support_list.loc[bc_support_list["Node ID"] == node_id, "Rigid_ID"].values[0]
        if rigid_id == 1:
            # Delete the row
            node_indices_non_zero_normals_df.drop(index, inplace=True)

# Save the filtered dataframe to a new CSV file
node_indices_non_zero_normals_df.to_csv("Data_Generator/input/bcPrescribeList.csv", index=False)

#convert from 1_based to 0-based
df = pd.read_csv("Data_Generator/input/bcPrescribeList.csv")

# Subtract 1 from the values in the first column to convert from 1-based to 0-based indices
df["Node Index"] -= 1

# Save the modified DataFrame to a new CSV file
df.to_csv("Data_Generator/input/bcPrescribeList_0_based.csv", index=False)


# Update bcSupportList.csv (remove 'Node ID' column and header)
bc_support_list = pd.read_csv("Data_Generator/input/bcSupportList.csv")
bc_support_list.drop(columns=["Node ID"], inplace=True)
bc_support_list.to_csv("Data_Generator/input/bcSupportList.csv", index=False, header=False)  # Remove header

# Update bcPrescribeList_0_based.csv (remove 'Node ID' column and header)
bc_prescribe_list = pd.read_csv("Data_Generator/input/bcPrescribeList_0_based.csv")#, header=True)
bc_prescribe_list.to_csv("Data_Generator/input/bcPrescribeList_0_based.csv", index=False, header=False)  # Remove header

Normals saved to input/normals.csv


In [5]:
# get only the columns for x,y,z normal directions

# Merge the two dataframes based on 'Node Index' (intersection of matching indices)
matching_df = pd.merge(non_zero_normals, node_indices_non_zero_normals_df, on='Node Index')

# Check if merge is successful
print(f"Matching DataFrame:\n{matching_df}")

# Extract the second, third, and fourth columns
directions = matching_df.iloc[:, 1:4]

# Extract each column
x_direction = directions.iloc[:, 0]
y_direction = directions.iloc[:, 1]
z_direction = directions.iloc[:, 2]

# Save each column as a separate CSV file
x_direction.to_csv('Data_Generator/input/x_direction.csv', index=False, header=False)
y_direction.to_csv('Data_Generator/input/y_direction.csv', index=False, header=False)
z_direction.to_csv('Data_Generator/input/z_direction.csv', index=False, header=False)

Matching DataFrame:
      Node Index  Normal X  Normal Y  Normal Z
0             85 -0.079704 -0.066261 -0.994614
1             89  0.389122 -0.392274 -0.833490
2            102  0.135919  0.069030 -0.988312
3            104  0.172890 -0.005404 -0.984926
4            108  0.001642  0.233260 -0.972413
...          ...       ...       ...       ...
1124       17534  0.008986  0.056693 -0.998351
1125       17540 -0.027402  0.046397 -0.998547
1126       17543 -0.027241 -0.059276 -0.997870
1127       17564 -0.094505  0.072314 -0.992895
1128       17577  0.149883 -0.104682 -0.983146

[1129 rows x 4 columns]


Generate the 10 distinct batches of 3 random directions along with the normal direction: a, b, c, ..., j
```
│
├── Hold-out
│   └── dataset
│       ├── a
│       │   ├── force_dir_x_a.csv
│       │   ├── force_dir_y_a.csv
│       │   └── force_dir_z_a.csv
│       ├── b
│       │   ├── force_dir_x_b.csv
│       │   ├── force_dir_y_b.csv
│       │   └── force_dir_z_b.csv
│       ├── c
│       │   └── ...
│       └── j
│           ├── force_dir_x_j.csv
│           ├── force_dir_y_j.csv
│           └── force_dir_z_j.csv
│
└── LODO
    ├── dataset
    │   ├── a
    │   │   ├── force_dir_x_a.csv
    │   │   ├── force_dir_y_a.csv
    │   │   └── force_dir_z_a.csv
    │   ├── b
    │   │   ├── force_dir_x_b.csv
    │   │   ├── force_dir_y_b.csv
    │   │   └── force_dir_z_b.csv
    │   ├── c
    │   │   └── ...
    │   └── j
    │       ├── force_dir_x_j.csv
    │       ├── force_dir_y_j.csv
    │       └── force_dir_z_j.csv
    │
    └── final_step
        ├── a
        │   ├── force_dir_x_a.csv (one direction only)
        │   ├── force_dir_y_a.csv (one direction only)
        │   └── force_dir_z_a.csv (one direction only)
```

In [6]:
def sample_hemisphere_x(n):
    """
    Generate `n` random directions from a hemisphere with radius 1 along the positive x-axis.

    :param n: Number of samples to generate.
    :return: Three separate lists containing x, y, and z components of the directions.
    """
    theta = np.random.uniform(0, np.pi / 2, n)  # Polar angle: 0 to pi/2 for hemisphere along x-axis
    phi = np.random.uniform(0, 2 * np.pi, n)  # Azimuthal angle: 0 to 2pi for full rotation
    
    x = np.cos(theta)
    y = np.sin(theta) * np.cos(phi)
    z = np.sin(theta) * np.sin(phi)
    
    return x.tolist(), y.tolist(), z.tolist()

def sample_hemisphere_y(n):
    """
    Generate `n` random directions from a hemisphere with radius 1 along the positive y-axis.

    :param n: Number of samples to generate.
    :return: Three separate lists containing x, y, and z components of the directions.
    """
    theta = np.random.uniform(0, np.pi / 2, n)  # Polar angle: 0 to pi/2 for hemisphere along y-axis
    phi = np.random.uniform(0, 2 * np.pi, n)  # Azimuthal angle: 0 to 2pi for full rotation
    
    x = np.sin(theta) * np.cos(phi)
    y = np.cos(theta)
    z = np.sin(theta) * np.sin(phi)
    
    return x.tolist(), y.tolist(), z.tolist()

def sample_hemisphere_z(n):
    """
    Generate `n` random directions from a hemisphere with radius 1 along the positive z-axis.

    :param n: Number of samples to generate.
    :return: Three separate lists containing x, y, and z components of the directions.
    """
    theta = np.random.uniform(0, np.pi / 2, n)  # Polar angle: 0 to pi/2 for hemisphere along z-axis
    phi = np.random.uniform(0, 2 * np.pi, n)  # Azimuthal angle: 0 to 2pi for full rotation
    
    x = np.sin(theta) * np.cos(phi)
    y = np.sin(theta) * np.sin(phi)
    z = np.cos(theta)
    
    return x.tolist(), y.tolist(), z.tolist()

# Read existing data
existing_data_x = pd.read_csv('Data_Generator/input/x_direction.csv', header=None)
existing_data_y = pd.read_csv('Data_Generator/input/y_direction.csv', header=None)
existing_data_z = pd.read_csv('Data_Generator/input/z_direction.csv', header=None)

# Determine the number of samples to generate based on the number of rows in the existing data
n_samples = existing_data_x.shape[0]

# Initialize lists to store new directions
new_x_columns = []
new_y_columns = []
new_z_columns = []

# Generate and append new directions
for _ in range(n_samples):
    x_list_x, y_list_x, z_list_x = sample_hemisphere_x(1)
    x_list_y, y_list_y, z_list_y = sample_hemisphere_y(1)
    x_list_z, y_list_z, z_list_z = sample_hemisphere_z(1)
    
    new_x_columns.append([x_list_x[0], y_list_x[0], z_list_x[0]])
    new_y_columns.append([x_list_y[0], y_list_y[0], z_list_y[0]])
    new_z_columns.append([x_list_z[0], y_list_z[0], z_list_z[0]])

# Convert the new columns to DataFrames
new_x_df = pd.DataFrame(new_x_columns)
new_y_df = pd.DataFrame(new_y_columns)
new_z_df = pd.DataFrame(new_z_columns)

# Concatenate the existing data with the new directions
combined_data_x = pd.concat([existing_data_x, new_x_df], axis=1)
combined_data_y = pd.concat([existing_data_y, new_y_df], axis=1)
combined_data_z = pd.concat([existing_data_z, new_z_df], axis=1)

import string
letters = list(string.ascii_lowercase[:10])  # This will give you 'a' to 'j'

for i in letters:
    # Ensure that directories exist for Hold-out
    holdout_dir = f'Hold-out/dataset/{i}'
    os.makedirs(holdout_dir, exist_ok=True)

    # Ensure that directories exist for LODO
    lodo_dir = f'LODO/dataset/{i}'
    os.makedirs(lodo_dir, exist_ok=True)

    # Save combined data for Hold-out
    combined_data_x.to_csv(f'{holdout_dir}/force_dir_x_{i}.csv', header=False, index=False)
    combined_data_y.to_csv(f'{holdout_dir}/force_dir_y_{i}.csv', header=False, index=False)
    combined_data_z.to_csv(f'{holdout_dir}/force_dir_z_{i}.csv', header=False, index=False)

    # Save combined data for LODO
    combined_data_x.to_csv(f'{lodo_dir}/force_dir_x_{i}.csv', header=False, index=False)
    combined_data_y.to_csv(f'{lodo_dir}/force_dir_y_{i}.csv', header=False, index=False)
    combined_data_z.to_csv(f'{lodo_dir}/force_dir_z_{i}.csv', header=False, index=False)


print(f"Appended new directions to x_direction.csv, y_direction.csv, and z_direction.csv.")

Appended new directions to x_direction.csv, y_direction.csv, and z_direction.csv.


In [8]:
num_rows = len(existing_data_x)

# Step 2: Generate random x, y, z directions with num_rows
random_directions = np.random.uniform(-1, 1, (num_rows, 3))  # Generate (num_rows, 3) array with values between -1 and 1

# Step 3: Normalize each row to create unit vectors
norms = np.linalg.norm(random_directions, axis=1, keepdims=True)  # Calculate the magnitude for each row
norms[norms == 0] = 1  # Replace zeros to avoid division by zero, set to default direction
normalized_directions = random_directions / norms  # Normalize each row

# Step 4: Split the normalized directions into x, y, z components
random_x = normalized_directions[:, 0]
random_y = normalized_directions[:, 1]
random_z = normalized_directions[:, 2]

# Step 5: Save the components to separate CSV files
output_folder = "LODO/final_step/a" 
os.makedirs(output_folder, exist_ok=True)  
pd.DataFrame(random_x).to_csv(f'{output_folder}/force_dir_x_a.csv', header=False, index=False)
pd.DataFrame(random_y).to_csv(f'{output_folder}/force_dir_y_a.csv', header=False, index=False)
pd.DataFrame(random_z).to_csv(f'{output_folder}/force_dir_z_a.csv', header=False, index=False)

print(f"Random directions saved to CSV files in {output_folder}/")

Random directions saved to CSV files in /home/hadeel/backup/rackmic/LODO/final_step/a/
