In [None]:
import keras
from keras.models import Model
from keras import backend as K
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose, ZeroPadding2D
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense, Flatten
import vtk
from vtkmodules.util.numpy_support import vtk_to_numpy
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os

import mlflow
import mlflow.keras
from mlflow.models.signature import infer_signature

mlflow.set_tracking_uri('http://your-mlflow-server:5000')
mlflow.set_experiment('steady-flow-prediction')

In [None]:
batch_size = 32
epochs = 5
learning_rate = 1e-4

In [None]:
class VTK_data:
    def __init__(self, base_path, split_ratio=0.8):
        self.base_path = Path(base_path)
        self.geometries = []
        self.steady_flows = []
        self.split_ratio = split_ratio
        self.split_line = 0

    def load_data(self):
        for dirpath, dirnames, filenames in os.walk(self.base_path):
            for filename in filenames:
                if filename.endswith('.vtm'):
                    full_path = Path(dirpath) / filename
                    try:
                        data = self._load_single_file(full_path)
                        if data is not None:
                            print(f"Processing: {filename}")
                            if 'geometry' in filename:
                                self.geometries.append(data)
                            elif 'cylinder2d_iT' in filename:
                                self.steady_flows.append(data)
                    except Exception as e:
                        print(f"Error loading {full_path}: {str(e)}")
        
        self.split_line = int(len(self.geometries) * self.split_ratio)
        return self.geometries, self.steady_flows

    def _load_single_file(self, file_path):
        reader = vtk.vtkXMLMultiBlockDataReader()
        reader.SetFileName(str(file_path))
        reader.Update()
        
        data = reader.GetOutput()
        if not data:
            return None
            
        data_iterator = data.NewIterator()
        img_data = data_iterator.GetCurrentDataObject()
        if not img_data:
            return None
        
        if hasattr(img_data, 'GetProducerPort'):
            producer = img_data.GetProducerPort()
            if producer:
                producer.Update()
        elif hasattr(img_data, 'GetSource'):
            source = img_data.GetSource()
            if source:
                source.Update()
                
        point_data = img_data.GetPointData()
        array_data = point_data.GetArray(0)
        return vtk_to_numpy(array_data)

In [None]:
dataset = VTK_data("../data")
geometries, steady_flows = dataset.load_data()

if len(dataset.geometries) > 0 and len(dataset.steady_flows) > 0:
    train_geometries = np.stack(dataset.geometries[:dataset.split_line])
    train_steady_flows = np.stack(dataset.steady_flows[:dataset.split_line])
    test_geometries = np.stack(dataset.geometries[dataset.split_line:])
    test_steady_flows = np.stack(dataset.steady_flows[dataset.split_line:])
    
    print(f"Training samples: {len(train_geometries)}")
    print(f"Testing samples: {len(test_geometries)}")
    print(f"\nGeometry shape: {train_geometries[0].shape}")
    print(f"Flow shape: {train_steady_flows[0].shape}")

In [None]:
with mlflow.start_run(run_name='steady-flow-unet') as run:
    mlflow.log_params({
        'batch_size': batch_size,
        'epochs': epochs,
        'learning_rate': learning_rate,
        'architecture': 'UNet'
    })
    
    inputs = Input(shape=(9812,))
    x = Reshape((44, 223, 1))(inputs)
    
    # Encoder
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    # Bridge
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)

    # Decoder with proper padding
    up6 = concatenate([ZeroPadding2D(((1,0),(1,0)))(Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv3)), conv2], axis=3)
    conv6 = Conv2D(64, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([ZeroPadding2D(((0,0),(1,0)))(Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv6)), conv1], axis=3)
    conv7 = Conv2D(32, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv7)
    
    conv8 = Conv2D(2, (1, 1), activation='linear')(conv7)
    outputs = Reshape((9812, 2))(conv8)
    
    model = Model(inputs, outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss='mse',
        metrics=['mse']
    )
    
    class MLflowCallback(keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs=None):
            for name, value in logs.items():
                mlflow.log_metric(f'epoch_{name}', value, step=epoch)
    
    history = model.fit(
        train_geometries,
        train_steady_flows,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(test_geometries, test_steady_flows),
        callbacks=[MLflowCallback()]
    )
    
    test_loss = model.evaluate(test_geometries, test_steady_flows, verbose=0)
    mlflow.log_metric('test_mse', test_loss[0])
    
    signature = infer_signature(
        train_geometries[0:1],
        model.predict(train_geometries[0:1])
    )
    
    mlflow.keras.log_model(
        model,
        'model',
        registered_model_name='steady_flow_predictor',
        signature=signature
    )

In [None]:
with mlflow.start_run(run_name='visualization'):
    predictions = model.predict(test_geometries[:3])
    
    for i in range(3):
        pred = predictions[i].reshape(44, 223, 2)
        true = test_steady_flows[i].reshape(44, 223, 2)
        geom = test_geometries[i].reshape(44, 223)
        
        plt.figure(figsize=(15, 5))
        comparison = np.concatenate([
            pred[:,:,0],
            true[:,:,0],
            geom/10.0
        ], axis=1)
        
        plt.imshow(comparison)
        plt.title(f'Sample {i+1}: Predicted vs True Flow vs Geometry')
        plt.colorbar()
        
        plt.savefig(f'prediction_{i}.png')
        mlflow.log_artifact(f'prediction_{i}.png')
        plt.close()
        
        mse = np.mean((pred - true) ** 2)
        mlflow.log_metric(f'sample_{i}_mse', mse)