In [2]:
import time as clock
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import trange
from IPython.display import clear_output, display
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, WhiteKernel
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb
from sklearn.multioutput import MultiOutputRegressor

In [3]:
# configuration of simulator
# and parametrs of reservouir
perm = np.load('perm_3sigma.npy')
nx0, nx1 = perm.shape
nx2 = 1
perm = np.reshape(perm, (nx0, nx1, nx2))
poro = 0.1 + np.zeros((nx0, nx1, nx2))

dx0 = 1.0 / nx0
dx1 = 1.0 / nx1
dx2 = 1.0 / nx2

pwat = 2.0
poil = 4.0
vr = 0.3
kwat = 1.0
koil = 0.3

pmin = 0.0
pmax = 1.0

niter = 5000

t_final = 3.0
dt = t_final / niter

In [3]:
nx0, nx1

(64, 64)

# Read from file

In [6]:
# sim_200 = np.random.randint(0, 5000, 200)
# sim_500 = np.random.randint(0, 5000, 500)
# sim_1000 = np.random.randint(0, 5000, 1000)
# sim_2000 = np.random.randint(0, 5000, 2000)
# np.save('sim_200.npy', sim_200)
# np.save('sim_500.npy', sim_500)
# np.save('sim_1000.npy', sim_1000)
# np.save('sim_2000.npy', sim_2000)

In [4]:
# sim_data: pres, swat, soil
n_points = 500
sim_r = np.load(f'sim_{n_points}.npy')#np.random.randint(0, 5000, n_points)

# 5000
sim_data = np.loadtxt("data_5k_3sigma/sim_5000.txt", dtype=np.float32)[sim_r]
x_list = np.loadtxt("data_5k_3sigma/x_5000.txt", dtype=np.float32)[sim_r]
y_list = np.loadtxt("data_5k_3sigma/y_5000.txt", dtype=np.float32)[sim_r]
t_list = np.loadtxt("data_5k_3sigma/t_5000.txt", dtype=np.float32)[sim_r]

# Convert to torch tensor

In [5]:
simulation_data = torch.tensor(sim_data).requires_grad_(True)

x = torch.tensor(x_list * dx0).requires_grad_(True)
y = torch.tensor(y_list * dx1).requires_grad_(True)
t = torch.tensor(t_list * dt).requires_grad_(True)

points = torch.stack((t, x, y), -1).requires_grad_(True).to('mps')
# points, simulation_data

In [6]:
DEVICE = 'cpu'

In [8]:
class PINN(nn.Module):
    def __init__(self, input_layer = 3, hidden_layer = 64, number_layers = 16, output_layer = 7):
        super().__init__()
        
        activation2 = nn.Sigmoid
        activation1 = nn.ReLU
        
        self.fcs = nn.Sequential(*
            [
                nn.Linear(input_layer, hidden_layer),
                activation1()
            ]
        )
        
        self.fch = nn.Sequential(*
            [
                nn.Sequential(*
                    [
                        nn.Linear(hidden_layer, hidden_layer),
                        activation1()
                    ]
                ) for _ in range(number_layers - 1)
            ]
        )
        
#         resnet = models.resnet18()
        
#         self.resnet_features = nn.Sequential(
#             resnet.layer1,   # First residual block
#             resnet.layer2,   # Second residual block
#             resnet.layer3,   # Third residual block
#             resnet.layer4    # Fourth residual block
#         )
        
#         self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
        
#         self.fce = nn.Linear(512, output_layer)
        
        self.act = nn.ReLU()
        self.fce = nn.Linear(hidden_layer, output_layer)
        
        
    def forward(self, x):
        """
        function for updating neural network
        
        return:
            x - vector (pres
                        soil
                        swat
                        uoil_x,  
                        uoil_y,
                        uwat_x,  
                        uwat_y)
        """
        
        x = self.fcs(x)
        x = self.fch(x)
        
#         x = x.view(x.size(0), 64, 1, 1)
#         x = self.resnet_features(x)
        
#         x = self.adaptive_pool(x)
        
#         x = x.view(x.size(0), -1)        
        x = self.fce(x)
#         x = self.act(x)
        
        return x

# SVM

In [9]:
def check_current_situation_others(svm_model, scaler, t):
    x = np.linspace(0, 1, 100)
    time = t * np.ones(100)
    real_temp = thermal_conductivity_equation([torch.tensor(time), torch.tensor(x)])

    test_points = np.stack((time, x), axis = -1)

    svm_pred = svm_model.predict(scaler.transform(test_points))

    plt.plot(x, svm_pred, label=f'SVM prediction with t = {t}')
    plt.plot(x, real_temp, label=f'Analytical solution with t = {t}')
    plt.grid()
    plt.xlabel('x')
    plt.ylabel(f'T(t={t}, x)')

    plt.plot()
    plt.legend()
    plt.show()
    
def check_current_situation_xgb(xgb, t):
    x = np.linspace(0, 1, 100)
    time = t * np.ones(100)
    real_temp = thermal_conductivity_equation([torch.tensor(time), torch.tensor(x)])

    test_points = np.stack((time, x), axis = -1)

    svm_pred = xgb.predict(test_points)

    plt.plot(x, svm_pred, label=f'SVM prediction with t = {t}')
    plt.plot(x, real_temp, label=f'Analytical solution with t = {t}')
    plt.grid()
    plt.xlabel('x')
    plt.ylabel(f'T(t={t}, x)')

    plt.plot()
    plt.legend()
    plt.show()

In [10]:
X_train = points.cpu().detach().numpy()
y_train = simulation_data.cpu().detach().numpy()

# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.2, random_state=42
# )

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

svr = SVR(kernel="rbf", C=1.0, epsilon=0.1, gamma="scale")
model_svr = MultiOutputRegressor(svr)
model_svr.fit(X_train_scaled, y_train)
# y_pred = svr.predict(X_test_scaled)
# print("MSE:", mean_squared_error(y_test, y_pred))
# print("R^2:", r2_score(y_test, y_pred))

In [11]:
# check_current_situation_others(svr, scaler, t=0.0)
# check_current_situation_others(svr, scaler, t=0.01)
# check_current_situation_others(svr, scaler, t=0.02)
# check_current_situation_others(svr, scaler, t=0.03)
# check_current_situation_others(svr, scaler, t=0.04)
# check_current_situation_others(svr, scaler, t=0.05)

# XGBoost

In [12]:
# Example data
xgb_regressor = xgb.XGBRegressor(tree_method='hist', verbosity=2)
xgb_regressor.fit(X_train, y_train)

[02:18:02] INFO: /Users/runner/work/xgboost/xgboost/src/data/iterative_dmatrix.cc:53: Finished constructing the `IterativeDMatrix`: (500, 3, 1500).


In [13]:
# check_current_situation_xgb(xgb_regressor, t=0)
# check_current_situation_xgb(xgb_regressor, t=0.01)
# check_current_situation_xgb(xgb_regressor, t=0.02)
# check_current_situation_xgb(xgb_regressor, t=0.03)
# check_current_situation_xgb(xgb_regressor, t=0.04)
# check_current_situation_xgb(xgb_regressor, t=0.05)

# Gaussian Processes Regression

In [14]:
kernel = C(1.0, (1e-3, 1e3)) * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2)) \
         + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))

gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, normalize_y=True)
gp.fit(X_train, y_train)
# y_pred, sigma = gp.predict(X_test, return_std=True)

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


In [15]:
# check_current_situation_xgb(gp, t=0)
# check_current_situation_xgb(gp, t=0.01)
# check_current_situation_xgb(gp, t=0.02)
# check_current_situation_xgb(gp, t=0.03)
# check_current_situation_xgb(gp, t=0.04)
# check_current_situation_xgb(gp, t=0.05)

# Metrics Computation

In [16]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import (
    mean_squared_error, 
    mean_absolute_error, 
    r2_score, 
    median_absolute_error, 
    mean_absolute_percentage_error
)

def evaluate_multidim_models(models_dict, X_test, y_test, device='cpu'):
    """
    Evaluates models on multi-output regression (Target dim: [N, 3]).
    Output Columns: Pressure, Soil (Oil), Swat (Water).
    
    Parameters:
    - models_dict: Dictionary {'ModelName': model_object}
    - X_test: Test features
    - y_test: Test targets (Must be shape [N, 3])
    - device: 'cpu' or 'cuda' (for PyTorch model)
    
    Returns:
    - pd.DataFrame: Rows are (Variable_Metric), Columns are model names.
    """
    results = {}
    
    # 1. Define the specific output names
    target_names = ["Pressure", "Soil", "Swat"]
    
    # 2. Prepare Ground Truth (y_true) as numpy [N, 3]
    if torch.is_tensor(y_test):
        y_true = y_test.detach().cpu().numpy()
    elif isinstance(y_test, (pd.DataFrame, pd.Series)):
        y_true = y_test.to_numpy()
    else:
        y_true = np.array(y_test)
        
    # Basic shape validation
    if y_true.ndim != 2 or y_true.shape[1] != 3:
        raise ValueError(f"y_test must be shape [N, 3], got {y_true.shape}")

    # 3. Evaluate each model
    for name, model in models_dict.items():
        y_pred = None
        
        # --- Generate Predictions [N, 3] ---
        
        # PyTorch Logic
        if isinstance(model, nn.Module):
            model.eval()
            
            # Prepare input tensor
            if torch.is_tensor(X_test):
                X_tensor = X_test.to(device)
            elif isinstance(X_test, (pd.DataFrame, pd.Series)):
                X_tensor = torch.tensor(X_test.values, dtype=torch.float32).to(device)
            else:
                X_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
            
            with torch.no_grad():
                raw_pred = model(X_tensor)[:, [0, 2, 1]]
                y_pred = raw_pred.cpu().numpy() # Keep shape [N, 3]
                
        # Scikit-Learn Logic
        else:
            if torch.is_tensor(X_test):
                X_input = X_test.cpu().numpy()
            else:
                X_input = X_test
                
            y_pred = model.predict(X_input) # Expecting [N, 3] output

        # Ensure prediction shape matches truth
        if y_pred.shape != y_true.shape:
             raise ValueError(f"Model {name} output shape {y_pred.shape} mismatch with y_test {y_true.shape}")

        # --- Calculate Metrics per Column ---
        model_metrics = {}
        
        # Loop over the 3 output dimensions
        for i, label in enumerate(target_names):
            # Extract single column (1D arrays)
            y_t = y_true[:, i]
            y_p = y_pred[:, i]
            
            # Calculate Standard Metrics
            mse = mean_squared_error(y_t, y_p)
            rmse = np.sqrt(mse)
            mae = mean_absolute_error(y_t, y_p)
            r2 = r2_score(y_t, y_p)
            medae = median_absolute_error(y_t, y_p)
            mape = mean_absolute_percentage_error(y_t, y_p)
            
            # Store with prefixed keys (e.g., "Pressure_MSE")
            model_metrics[f"{label}_MSE"] = mse
            model_metrics[f"{label}_RMSE"] = rmse
            model_metrics[f"{label}_MAE"] = mae
            model_metrics[f"{label}_R2"] = r2
            model_metrics[f"{label}_MedAE"] = medae
            model_metrics[f"{label}_MAPE"] = mape

        results[name] = model_metrics

    # Create DataFrame
    df_results = pd.DataFrame(results)
    
    return df_results

In [17]:
pinn = PINN()
checkpoint = torch.load("pinn_model_weights_500.pth", map_location='cpu')
pinn.load_state_dict(checkpoint)

<All keys matched successfully>

In [18]:
# sim_data: pres, swat, soil
n_points = 2000
sim_r = np.load(f'sim_{n_points}.npy')#np.random.randint(0, 5000, n_points)

# 5000
sim_data = np.loadtxt("data_5k_3sigma/sim_5000.txt", dtype=np.float32)[sim_r]
x_list = np.loadtxt("data_5k_3sigma/x_5000.txt", dtype=np.float32)[sim_r]
y_list = np.loadtxt("data_5k_3sigma/y_5000.txt", dtype=np.float32)[sim_r]
t_list = np.loadtxt("data_5k_3sigma/t_5000.txt", dtype=np.float32)[sim_r]

In [19]:
simulation_data = torch.tensor(sim_data).requires_grad_(True)

x = torch.tensor(x_list * dx0).requires_grad_(True)
y = torch.tensor(y_list * dx1).requires_grad_(True)
t = torch.tensor(t_list * dt).requires_grad_(True)

points = torch.stack((t, x, y), -1).requires_grad_(True).to('mps')
# points, simulation_data

In [21]:
X_test = points.cpu().detach().numpy()
y_test = simulation_data.cpu().detach().numpy()[:, [0, 1, 2]]

In [22]:
my_models = {
    'PINN': pinn,
    'XGB': xgb_regressor,
    'SVM': model_svr,
    'GP': gp
}

df_metrics = evaluate_multidim_models(my_models, X_test, y_test)
df_metrics

Unnamed: 0,PINN,XGB,SVM,GP
Pressure_MSE,0.00124708,0.0009199103,0.1059988,0.005976473
Pressure_RMSE,0.03531403,0.03033002,0.3255745,0.07730765
Pressure_MAE,0.02521433,0.01864464,0.2384688,0.04207838
Pressure_R2,0.9840842,0.9882597,-0.3528011,0.9237257
Pressure_MedAE,0.01785316,0.01044016,0.1436751,0.01987128
Pressure_MAPE,0.1970084,0.2662002,3.246511,1.241065
Soil_MSE,0.0009005318,0.002242123,0.02288298,0.003834241
Soil_RMSE,0.03000886,0.04735106,0.1512712,0.06192125
Soil_MAE,0.01466957,0.02585122,0.1043529,0.03570496
Soil_R2,0.9584786,0.8966211,-0.05507918,0.8232124


In [24]:
import matplotlib.pyplot as plt
from pandas.plotting import table

def save_df_as_png(df, filename="metrics.png"):
    # Create a figure. Adjust figsize width/height as needed
    fig, ax = plt.subplots(figsize=(10, 4)) 
    
    # Hide the axes (we only want the table)
    ax.xaxis.set_visible(False) 
    ax.yaxis.set_visible(False)
    ax.set_frame_on(False)

    # Create the table
    # loc='center' centers it
    tab = table(ax, df, loc='center', cellLoc='center', rowLoc='center')

    # Style: Increase font size and scale
    tab.auto_set_font_size(False)
    tab.set_fontsize(12)
    tab.scale(1.2, 1.2) # Scale width and height

    # Save
    plt.savefig(filename, bbox_inches='tight', dpi=300)
    plt.close()

# Usage with your metrics dataframe
# It's usually better to round numbers before plotting with matplotlib

In [25]:
save_df_as_png(df_metrics, filename='metrics_500.png')

In [23]:
df_metrics.values,

(array([[ 0.01553177,  0.01861234,  0.16224783,  0.00163693],
        [ 0.12462652,  0.13642706,  0.40279998,  0.04045901],
        [ 0.08724123,  0.05774736,  0.32621013,  0.02069011],
        [ 0.89996213,  0.88012058, -0.04501502,  0.98945676],
        [ 0.05388868,  0.01841325,  0.27887026,  0.01128623],
        [ 0.04579656,  0.05363928,  0.20882493,  0.01316636]]),)