In [1]:
import keras
from keras.models import Model
from keras import backend as K
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose, ZeroPadding2D
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense, Flatten
import vtk
from vtkmodules.util.numpy_support import vtk_to_numpy
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os

import mlflow
import mlflow.keras
from mlflow.models.signature import infer_signature

mlflow.set_experiment('steady-flow-prediction')

2025-01-14 20:11:11.017445: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-14 20:11:11.031496: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736885471.046107    5138 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736885471.050577    5138 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-14 20:11:11.065481: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

<Experiment: artifact_location='mlflow-artifacts:/mlflow', creation_time=1736885099345, experiment_id='1209', last_update_time=1736885099345, lifecycle_stage='active', name='steady-flow-prediction', tags={'mlflow.domino.dataset_info': '6785663fba6ede24a00ec8b9-6785663fba6ede24a00ec8b8',
 'mlflow.domino.environment_id': '67857df19cb041514b92b5ca',
 'mlflow.domino.environment_revision_id': '678674659cb041514b92b847',
 'mlflow.domino.hardware_tier': 'large-k8s',
 'mlflow.domino.project_id': '6785663eba6ede24a00ec8b4',
 'mlflow.domino.project_name': 'cfd-ml-examples',
 'mlflow.domino.run_id': '6786b7a5a12d5146bf75972a',
 'mlflow.domino.run_number': '6',
 'mlflow.domino.user': 'dmennell',
 'mlflow.domino.user_id': '64da8c4df9b96f5a85ea9e51',
 'mlflow.source.type': 'NOTEBOOK',
 'mlflow.user': 'dmennell'}>

In [2]:
batch_size = 32
epochs = 3
learning_rate = 1e-4

In [3]:
class VTK_data:
    def __init__(self, base_path, split_ratio=0.8):
        self.base_path = Path(base_path)
        self.geometries = []
        self.steady_flows = []
        self.split_ratio = split_ratio
        self.split_line = 0

    def load_data(self):
        for dirpath, dirnames, filenames in os.walk(self.base_path):
            for filename in filenames:
                if filename.endswith('.vtm'):
                    full_path = Path(dirpath) / filename
                    try:
                        data = self._load_single_file(full_path)
                        if data is not None:
                            print(f"Processing: {filename}")
                            if 'geometry' in filename:
                                self.geometries.append(data)
                            elif 'cylinder2d_iT' in filename:
                                self.steady_flows.append(data)
                    except Exception as e:
                        print(f"Error loading {full_path}: {str(e)}")
        
        self.split_line = int(len(self.geometries) * self.split_ratio)
        return self.geometries, self.steady_flows

    def _load_single_file(self, file_path):
        reader = vtk.vtkXMLMultiBlockDataReader()
        reader.SetFileName(str(file_path))
        reader.Update()
        
        data = reader.GetOutput()
        if not data:
            return None
            
        data_iterator = data.NewIterator()
        img_data = data_iterator.GetCurrentDataObject()
        if not img_data:
            return None
        
        if hasattr(img_data, 'GetProducerPort'):
            producer = img_data.GetProducerPort()
            if producer:
                producer.Update()
        elif hasattr(img_data, 'GetSource'):
            source = img_data.GetSource()
            if source:
                source.Update()
                
        point_data = img_data.GetPointData()
        array_data = point_data.GetArray(0)
        return vtk_to_numpy(array_data)

In [4]:
dataset = VTK_data("../data")
geometries, steady_flows = dataset.load_data()

if len(dataset.geometries) > 0 and len(dataset.steady_flows) > 0:
    train_geometries = np.stack(dataset.geometries[:dataset.split_line])
    train_steady_flows = np.stack(dataset.steady_flows[:dataset.split_line])
    test_geometries = np.stack(dataset.geometries[dataset.split_line:])
    test_steady_flows = np.stack(dataset.steady_flows[dataset.split_line:])
    
    print(f"Training samples: {len(train_geometries)}")
    print(f"Testing samples: {len(test_geometries)}")
    print(f"\nGeometry shape: {train_geometries[0].shape}")
    print(f"Flow shape: {train_steady_flows[0].shape}")

Processing: geometry_iT0000000.vtm
Processing: rank_iT0000000.vtm
Processing: cuboid_iT0000000.vtm
Processing: cylinder2d_iT0021720.vtm
Processing: geometry_iT0000000.vtm
Processing: rank_iT0000000.vtm
Processing: cuboid_iT0000000.vtm
Processing: cylinder2d_iT0041196.vtm
Processing: geometry_iT0000000.vtm
Processing: rank_iT0000000.vtm
Processing: cuboid_iT0000000.vtm
Processing: cylinder2d_iT0024829.vtm
Processing: geometry_iT0000000.vtm
Processing: rank_iT0000000.vtm
Processing: cuboid_iT0000000.vtm
Processing: cylinder2d_iT0039865.vtm
Processing: geometry_iT0000000.vtm
Processing: rank_iT0000000.vtm
Processing: cuboid_iT0000000.vtm
Processing: cylinder2d_iT0042205.vtm
Processing: geometry_iT0000000.vtm
Processing: rank_iT0000000.vtm
Processing: cuboid_iT0000000.vtm
Processing: cylinder2d_iT0023989.vtm
Processing: geometry_iT0000000.vtm
Processing: rank_iT0000000.vtm
Processing: cuboid_iT0000000.vtm
Processing: cylinder2d_iT0023187.vtm
Processing: geometry_iT0000000.vtm
Processing: r

In [5]:
with mlflow.start_run(run_name='steady-flow-unet') as run:
    mlflow.log_params({
        'batch_size': batch_size,
        'epochs': epochs,
        'learning_rate': learning_rate,
        'architecture': 'UNet'
    })
    
    inputs = Input(shape=(9812,))
    x = Reshape((44, 223, 1))(inputs)
    
    # Encoder
    conv1 = Conv2D(32, 3, activation='relu', padding='same')(x)
    conv1 = Conv2D(32, 3, activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D()(conv1)
    
    conv2 = Conv2D(64, 3, activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, 3, activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D()(conv2)
    
    # Bridge
    conv3 = Conv2D(128, 3, activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, 3, activation='relu', padding='same')(conv3)
    
    # Decoder
    up1 = concatenate([Conv2DTranspose(64, 2, strides=2, padding='same')(conv3), conv2])
    conv4 = Conv2D(64, 3, activation='relu', padding='same')(up1)
    conv4 = Conv2D(64, 3, activation='relu', padding='same')(conv4)
    
    up2 = concatenate([Conv2DTranspose(32, 2, strides=2, padding='same')(conv4), conv1])
    conv5 = Conv2D(32, 3, activation='relu', padding='same')(up2)
    conv5 = Conv2D(32, 3, activation='relu', padding='same')(conv5)
    
    conv6 = Conv2D(2, 1, activation='linear')(conv5)
    outputs = Reshape((9812, 2))(conv6)
    
    model = Model(inputs, outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss='mse',
        metrics=['mse']
    )
    
    class MLflowCallback(keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs=None):
            for name, value in logs.items():
                mlflow.log_metric(f'epoch_{name}', value, step=epoch)
    
    history = model.fit(
        train_geometries,
        train_steady_flows,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(test_geometries, test_steady_flows),
        callbacks=[MLflowCallback()]
    )
    
    test_loss = model.evaluate(test_geometries, test_steady_flows, verbose=0)
    mlflow.log_metric('test_mse', test_loss[0])
    
    signature = infer_signature(
        train_geometries[0:1],
        model.predict(train_geometries[0:1])
    )
    
    mlflow.keras.log_model(
        model,
        'model',
        registered_model_name='steady_flow_predictor',
        signature=signature
    )

2025-01-14 20:12:00.164409: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


🏃 View run steady-flow-unet at: http://127.0.0.1:8768/#/experiments/1209/runs/efb69be7fa23494e8a2a974e62668e0c
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1209


ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 22, 110, 64), (None, 22, 111, 64)]

In [None]:
with mlflow.start_run(run_name='visualization'):
    predictions = model.predict(test_geometries[:3])
    
    for i in range(3):
        pred = predictions[i].reshape(44, 223, 2)
        true = test_steady_flows[i].reshape(44, 223, 2)
        geom = test_geometries[i].reshape(44, 223)
        
        plt.figure(figsize=(15, 5))
        comparison = np.concatenate([
            pred[:,:,0],
            true[:,:,0],
            geom/10.0
        ], axis=1)
        
        plt.imshow(comparison)
        plt.title(f'Sample {i+1}: Predicted vs True Flow vs Geometry')
        plt.colorbar()
        
        plt.savefig(f'prediction_{i}.png')
        mlflow.log_artifact(f'prediction_{i}.png')
        plt.close()
        
        mse = np.mean((pred - true) ** 2)
        mlflow.log_metric(f'sample_{i}_mse', mse)