In [6]:
# Imports
import keras
from keras.models import Model
from keras import backend as K
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose, ZeroPadding2D
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense, Flatten
import vtk
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os

# MLflow imports
import mlflow
import mlflow.keras
from mlflow.models.signature import infer_signature

# Set MLflow tracking URI
mlflow.set_experiment('steady-flow-prediction')

<Experiment: artifact_location='mlflow-artifacts:/mlflow', creation_time=1736885099345, experiment_id='1209', last_update_time=1736885099345, lifecycle_stage='active', name='steady-flow-prediction', tags={'mlflow.domino.dataset_info': '6785663fba6ede24a00ec8b9-6785663fba6ede24a00ec8b8',
 'mlflow.domino.environment_id': '67857df19cb041514b92b5ca',
 'mlflow.domino.environment_revision_id': '678674659cb041514b92b847',
 'mlflow.domino.hardware_tier': 'large-k8s',
 'mlflow.domino.project_id': '6785663eba6ede24a00ec8b4',
 'mlflow.domino.project_name': 'cfd-ml-examples',
 'mlflow.domino.run_id': '6786b7a5a12d5146bf75972a',
 'mlflow.domino.run_number': '6',
 'mlflow.domino.user': 'dmennell',
 'mlflow.domino.user_id': '64da8c4df9b96f5a85ea9e51',
 'mlflow.source.type': 'NOTEBOOK',
 'mlflow.user': 'dmennell'}>

In [3]:
# Training params
batch_size = 32
epochs = 2
learning_rate = 1e-4

In [4]:
class VTK_data:
    def __init__(self, base_path, split_ratio=0.8):
        self.base_path = Path(base_path)
        self.data = []
        self.geometries = []
        self.steady_flows = []
        self.split_ratio = split_ratio
        self.split_line = 0
        
    def load_data(self):
        for dirpath, dirnames, filenames in os.walk(self.base_path):
            for filename in filenames:
                if filename.endswith('.vtm'):
                    full_path = Path(dirpath) / filename
                    try:
                        data = self._load_single_file(full_path)
                        if data is not None:
                            if 'geometry' in filename:
                                self.geometries.append(data)
                            elif 'cylinder2d_iT' in filename:
                                self.steady_flows.append(data)
                    except Exception as e:
                        print(f"Error loading {full_path}: {str(e)}")
        
        self.split_line = int(len(self.geometries) * self.split_ratio)
        return self.geometries, self.steady_flows
    
    def _load_single_file(self, file_path):
        reader = vtk.vtkXMLMultiBlockDataReader()
        reader.SetFileName(str(file_path))
        reader.Update()
        
        data = reader.GetOutput()
        data_iterator = data.NewIterator()
        img_data = data_iterator.GetCurrentDataObject()
        
        if hasattr(img_data, 'GetProducerPort'):
            producer = img_data.GetProducerPort()
            if producer:
                producer.Update()
        elif hasattr(img_data, 'GetSource'):
            source = img_data.GetSource()
            if source:
                source.Update()
                
        point_data = img_data.GetPointData()
        array_data = point_data.GetArray(0)
        return vtk.util.numpy_support.vtk_to_numpy(array_data)

In [5]:
# Load and prepare data
base_directory = "../data"
dataset = VTK_data(base_directory)
geometries, steady_flows = dataset.load_data()

if len(dataset.geometries) > 0 and len(dataset.steady_flows) > 0:
    # Split and stack data
    train_geometries = np.stack(dataset.geometries[:dataset.split_line])
    train_steady_flows = np.stack(dataset.steady_flows[:dataset.split_line])
    test_geometries = np.stack(dataset.geometries[dataset.split_line:])
    test_steady_flows = np.stack(dataset.steady_flows[dataset.split_line:])
    
    print(f"Train samples: {len(train_geometries)}")
    print(f"Test samples: {len(test_geometries)}")

Error loading ../data/simulation_data/runlog_01302/vtkData/geometry_iT0000000.vtm: module 'vtk' has no attribute 'util'
Error loading ../data/simulation_data/runlog_01302/vtkData/rank_iT0000000.vtm: module 'vtk' has no attribute 'util'
Error loading ../data/simulation_data/runlog_01302/vtkData/cuboid_iT0000000.vtm: module 'vtk' has no attribute 'util'
Error loading ../data/simulation_data/runlog_01302/vtkData/data/cylinder2d_iT0021720.vtm: module 'vtk' has no attribute 'util'
Error loading ../data/simulation_data/runlog_01213/vtkData/geometry_iT0000000.vtm: module 'vtk' has no attribute 'util'
Error loading ../data/simulation_data/runlog_01213/vtkData/rank_iT0000000.vtm: module 'vtk' has no attribute 'util'
Error loading ../data/simulation_data/runlog_01213/vtkData/cuboid_iT0000000.vtm: module 'vtk' has no attribute 'util'
Error loading ../data/simulation_data/runlog_01213/vtkData/data/cylinder2d_iT0041196.vtm: module 'vtk' has no attribute 'util'
Error loading ../data/simulation_data/

In [None]:
with mlflow.start_run(run_name='steady-flow-unet') as run:
    # Log parameters
    mlflow.log_params({
        'batch_size': batch_size,
        'epochs': epochs,
        'learning_rate': learning_rate,
        'architecture': 'UNet'
    })
    
    # Model architecture
    inputs = Input(shape=(9812,))
    reshaped = Reshape((44, 223, 1))(inputs)
    
    # Encoder
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(reshaped)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    
    # Bridge
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    
    # Decoder
    up1 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv3), conv2], axis=3)
    conv4 = Conv2D(64, (3, 3), activation='relu', padding='same')(up1)
    
    up2 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv4), conv1], axis=3)
    conv5 = Conv2D(32, (3, 3), activation='relu', padding='same')(up2)
    
    # Output
    conv6 = Conv2D(2, (1, 1), activation='linear')(conv5)
    final_output = Reshape((9812, 2))(conv6)
    
    model = Model(inputs=inputs, outputs=final_output)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                 loss='mse',
                 metrics=['MSE'])
    
    # MLflow callback
    class MLflowCallback(keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs=None):
            logs = logs or {}
            for name, value in logs.items():
                mlflow.log_metric(f'epoch_{name}', value, step=epoch)
    
    # Train
    history = model.fit(train_geometries, train_steady_flows,
                       batch_size=batch_size,
                       epochs=epochs,
                       validation_data=(test_geometries, test_steady_flows),
                       callbacks=[MLflowCallback()])
    
    # Log metrics and model
    test_loss = model.evaluate(test_geometries, test_steady_flows, verbose=0)
    mlflow.log_metric('test_mse', test_loss[0])
    
    signature = infer_signature(train_geometries[0:1], model.predict(train_geometries[0:1]))
    mlflow.keras.log_model(model, 'model',
                          registered_model_name='steady_flow_predictor',
                          signature=signature)

In [None]:
with mlflow.start_run(run_name='prediction-visualization'):
    predicted_flows = model.predict(test_geometries, batch_size=batch_size)
    
    for i in range(min(3, len(test_geometries))):
        pred_flow = predicted_flows[i].reshape(44, 223, 2)
        true_flow = test_steady_flows[i].reshape(44, 223, 2)
        geometry = test_geometries[i].reshape(44, 223)
        
        plt.figure(figsize=(15, 5))
        comparison = np.concatenate([pred_flow[:,:,0], 
                                    true_flow[:,:,0], 
                                    geometry/10.0], axis=1)
        
        plt.imshow(comparison)
        plt.title(f'Sample {i+1}: Predicted vs True Flow vs Geometry')
        plt.colorbar()
        
        # Log to MLflow
        plt.savefig(f'prediction_{i}.png')
        mlflow.log_artifact(f'prediction_{i}.png')
        plt.close()
        
        mse = np.mean((pred_flow - true_flow) ** 2)
        mlflow.log_metric(f'prediction_{i}_mse', mse)