In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
import sys
import random
from sklearn.preprocessing import StandardScaler
from IPython.display import display, Math
from torch.utils.data import DataLoader, Dataset
import time
import json

from utils import *
from model_utils import *

sys_epsilon = sys.float_info.epsilon

# Data Preprocessing

In this section, we preprocess the CFD data collected from DSM-based LES of flow past a sphere at $\mathcal{R}e=10^3$, $5\times 10^3$, and $10^4$. The data for each Reynolds number is collected from $t=300$ to $320$. From $t=300$ to $310$, data is collected at every ten variable time-steps. However, from $t=310$ to $320$, data is collected with $\Delta t =1$.

In [2]:
paths = ["/home/hmarefat/scratch/torchFOAM/Case_dS/postProcessing/fieldData.dat",
         "/home/hmarefat/scratch/torchFOAM/Case_dS_R53/postProcessing/fieldData.dat",
         "/home/hmarefat/scratch/torchFOAM/Case_dS_R4/postProcessing/fieldData.dat"]
nCols = 26
names = ['_R3', '_R53', '_R4']

In [None]:
random.seed(35)

count = 1000000
Indx_R3 = returnRandmIndx(4956526, count)
Indx_R53 = returnRandmIndx(5900626, count)
Indx_R4 = returnRandmIndx(4012425, count)

In [None]:
Indx_R3[0:10]

In [None]:
random.shuffle(Indx_R3)

In [None]:
Indx_R3[0:10]

In [None]:
index_data = []

for indx in [Indx_R3, Indx_R53, Indx_R4]:
    seen, unseen = splitterIndx(indx)
    index_data.append((seen, unseen))

index_array = np.array(index_data, dtype=object)

In [None]:
index_array[2,0].shape, index_array[2,1].shape

In [None]:
data = []

for i, path in enumerate(paths):
    name = path.split('/')[-1][:-4]+names[i]
    print(f'Starting to preprocess dataset {name}')
    data.clear()
    
    with open(path, "r") as f:
        next(f)
        for line in f:
            l = line.split()
            try:
                data.append([float(x) for x in l]) #(l)
            except ValueError as e:
                print(f"Error converting line to float: {line.strip()} - {e}")
                continue
                
    #data.pop(0)
    print('Reading raw file is done!')
    ds = np.array(data) 
    
    ds_seen = ds[index_array[i,0]]
    ds_unseen = ds[index_array[i,1]]
    
    scaler(name+'_seen', ds_seen)
    scaler(name+'_unseen', ds_unseen)
    
    print('\n')

# Data Preparation

In [2]:
headers = ["t",                                             # time
           "X", "Y", "Z",                                   # spacial coordinates
           "Ux", "Uy", "Uz",                                # velocity components
           "G1", "G2", "G3", "G4", "G5", "G6",              # velocity gradient tensor components
           "S1", "S2", "S3", "S4", "S5", "S6",              # strain rate tensor compnents
           "UUp1", "UUp2", "UUp3", "UUp4", "UUp5", "UUp6",  # resolved Reynolds stress tensor components
           "Cs"]                                            # Smagorinsky coefficient

### Dataset Loading

#### $\mathcal Re = 10^3$

In [3]:
# Re = 10^3 seen
dSn_R103_seen_means = pd.read_csv('../processedDatasets/fieldData_R3_seen_means.txt', sep=' ', names=headers) 
dSn_R103_seen_scales = pd.read_csv('../processedDatasets/fieldData_R3_seen_scales.txt', sep=' ', names=headers) 
dSn_R103_seen = pd.read_csv('../processedDatasets/fieldData_R3_seen_norm.txt', sep=' ', names=headers)
dS_R103_seen = pd.read_csv('../processedDatasets/fieldData_R3_seen.txt', sep=' ', names=headers)

# Re = 10^3 unseen
dSn_R103_unseen_means = pd.read_csv('../processedDatasets/fieldData_R3_unseen_means.txt', sep=' ', names=headers) 
dSn_R103_unseen_scales = pd.read_csv('../processedDatasets/fieldData_R3_unseen_scales.txt', sep=' ', names=headers) 
dSn_R103_unseen = pd.read_csv('../processedDatasets/fieldData_R3_unseen_norm.txt', sep=' ', names=headers)
dS_R103_unseen = pd.read_csv('../processedDatasets/fieldData_R3_unseen.txt', sep=' ', names=headers)

#### $\mathcal Re = 5\times 10^3$

In [4]:
# Re = 5 x 10^3 seen
dSn_R503_seen_means = pd.read_csv('../processedDatasets/fieldData_R53_seen_means.txt', sep=' ', names=headers) 
dSn_R503_seen_scales = pd.read_csv('../processedDatasets/fieldData_R53_seen_scales.txt', sep=' ', names=headers) 
dSn_R503_seen = pd.read_csv('../processedDatasets/fieldData_R53_seen_norm.txt', sep=' ', names=headers)
dS_R503_seen = pd.read_csv('../processedDatasets/fieldData_R53_seen.txt', sep=' ', names=headers)

# Re = 5 x 10^3 unseen
dSn_R503_unseen_means = pd.read_csv('../processedDatasets/fieldData_R53_unseen_means.txt', sep=' ', names=headers) 
dSn_R503_unseen_scales = pd.read_csv('../processedDatasets/fieldData_R53_unseen_scales.txt', sep=' ', names=headers) 
dSn_R503_unseen = pd.read_csv('../processedDatasets/fieldData_R53_unseen_norm.txt', sep=' ', names=headers)
dS_R503_unseen = pd.read_csv('../processedDatasets/fieldData_R53_unseen.txt', sep=' ', names=headers)

#### $\mathcal Re = 10^4$

In [5]:
# Re = 10^4 seen
dSn_R104_seen_means = pd.read_csv('../processedDatasets/fieldData_R4_seen_means.txt', sep=' ', names=headers) 
dSn_R104_seen_scales = pd.read_csv('../processedDatasets/fieldData_R4_seen_scales.txt', sep=' ', names=headers) 
dSn_R104_seen = pd.read_csv('../processedDatasets/fieldData_R4_seen_norm.txt', sep=' ', names=headers)
dS_R104_seen = pd.read_csv('../processedDatasets/fieldData_R4_seen.txt', sep=' ', names=headers)

# Re = 10^4 unseen
dSn_R104_unseen_means = pd.read_csv('../processedDatasets/fieldData_R4_unseen_means.txt', sep=' ', names=headers) 
dSn_R104_unseen_scales = pd.read_csv('../processedDatasets/fieldData_R4_unseen_scales.txt', sep=' ', names=headers) 
dSn_R104_unseen = pd.read_csv('../processedDatasets/fieldData_R4_unseen_norm.txt', sep=' ', names=headers)
dS_R104_unseen = pd.read_csv('../processedDatasets/fieldData_R4_unseen.txt', sep=' ', names=headers)

# Model Configuration Setup

In this section, we consider the different model configurations as presented in the following table.

$$
\begin{array}{|l|c|c|c|c|}
    \hline
    \textbf{Model} & \textbf{Inputs} & \textbf{No. of Inputs} & \textbf{Outputs} & \textbf{No. of Outputs} \\
    \hline
    \mathbf{M1} & u_i \, \text{and} \, \mathcal{S}_{ij} & 9 & c_s & 1\\
    \mathbf{M2} & \mathcal{G}_{ij} \, \text{and} \, \mathcal{S}_{ij} & 12 & c_s & 1 \\
    \mathbf{M3} & u_i \, \text{and} \, \tau^{'}_{ij} & 9 & c_s & 1 \\
    \mathbf{M4} & \mathcal{G}_{ij} \, \text{and} \, \tau^{'}_{ij} & 12 & c_s & 1 \\
    \hline
\end{array}
$$

In [6]:
R103 = dSn_R103_seen
R503 = dSn_R503_seen
R104 = dSn_R104_seen

R103_un = dSn_R103_unseen
R503_un = dSn_R503_unseen
R104_un = dSn_R104_unseen

In [7]:
M1_headers = ['Ux', 'Uy', 'Uz', 'S1',  'S2', 'S3', 'S4', 'S5', 'S6', 'Cs']
M2_headers = ['G1', 'G2', 'G3', 'G4', 'G5', 'G6', 'S1',  'S2', 'S3', 'S4', 'S5', 'S6', 'Cs']
M3_headers = ['Ux', 'Uy', 'Uz', 'UUp1',  'UUp2', 'UUp3', 'UUp4', 'UUp5', 'UUp6', 'Cs']
M4_headers = ['G1', 'G2', 'G3', 'G4', 'G5', 'G6', 'UUp1',  'UUp2', 'UUp3', 'UUp4', 'UUp5', 'UUp6', 'Cs']

M1_103 = R103.filter(M1_headers, axis=1)
M2_103 = R103.filter(M2_headers, axis=1)
M3_103 = R103.filter(M3_headers, axis=1)
M4_103 = R103.filter(M4_headers, axis=1)

M1_503 = R503.filter(M1_headers, axis=1)
M2_503 = R503.filter(M2_headers, axis=1)
M3_503 = R503.filter(M3_headers, axis=1)
M4_503 = R503.filter(M4_headers, axis=1)

M1_104 = R104.filter(M1_headers, axis=1)
M2_104 = R104.filter(M2_headers, axis=1)
M3_104 = R104.filter(M3_headers, axis=1)
M4_104 = R104.filter(M4_headers, axis=1)

M1_103_test = R103_un.filter(M1_headers, axis=1)
M2_103_test = R103_un.filter(M2_headers, axis=1)
M3_103_test = R103_un.filter(M3_headers, axis=1)
M4_103_test = R103_un.filter(M4_headers, axis=1)

M1_503_test = R503_un.filter(M1_headers, axis=1)
M2_503_test = R503_un.filter(M2_headers, axis=1)
M3_503_test = R503_un.filter(M3_headers, axis=1)
M4_503_test = R503_un.filter(M4_headers, axis=1)

M1_104_test = R104_un.filter(M1_headers, axis=1)
M2_104_test = R104_un.filter(M2_headers, axis=1)
M3_104_test = R104_un.filter(M3_headers, axis=1)
M4_104_test = R104_un.filter(M4_headers, axis=1)

# Model Training

In [8]:
dt = M3_503
dt_name = namestr(M3_503, globals())[0]

In [9]:
output_size = 1
input_size = dt.shape[1] - output_size 
neurons_per_layer = [60, 60, 60, 60, 60] 
hidden_layers = len(neurons_per_layer) 

split_sz = 0.8
mask = np.random.rand(len(dt)) < split_sz
train = dt[mask].reset_index(drop=True) 
val = dt[~mask].reset_index(drop=True)

In [10]:
train

Unnamed: 0,Ux,Uy,Uz,UUp1,UUp2,UUp3,UUp4,UUp5,UUp6,Cs
0,0.387845,-0.088340,-0.157123,-0.393564,-0.150992,-0.382543,-0.485826,0.958239,-0.345330,0.365609
1,0.124171,1.153330,0.859819,-0.608379,0.006559,-0.006036,-0.726676,0.058893,-0.734172,0.811766
2,0.375118,0.015067,-0.251317,-0.482886,0.183596,-0.185654,-0.515060,-0.765348,-0.496401,-0.605361
3,0.382038,1.159673,0.899886,-0.414113,0.268310,-0.154491,-0.449333,-0.884296,-0.512653,0.102909
4,0.290710,0.444422,-0.126638,-0.254986,-0.083773,0.312756,0.046591,-0.094354,0.089267,0.147141
...,...,...,...,...,...,...,...,...,...,...
559875,-0.773576,-1.884674,0.984644,-0.215752,0.240980,0.479887,-0.172087,1.307309,0.027008,0.202450
559876,0.652647,-0.182953,0.105359,-0.596335,0.000273,-0.007337,-0.656253,-0.080959,-0.726984,-0.381208
559877,0.051503,0.966842,1.274566,-0.248935,0.239108,-0.252350,0.055365,-0.607500,0.116716,-0.048293
559878,0.198302,1.299420,0.399603,0.170223,1.043766,0.222861,0.293603,0.824015,-0.067349,-0.666026


In [11]:
val

Unnamed: 0,Ux,Uy,Uz,UUp1,UUp2,UUp3,UUp4,UUp5,UUp6,Cs
0,-0.806951,4.583876,-1.041129,0.565466,-1.395509,0.858814,0.483890,-2.105549,0.361092,-0.355598
1,0.331122,-0.098936,0.339499,-0.136983,-0.108289,0.656239,-0.326573,-0.459737,-0.058654,0.073648
2,0.257969,0.999813,-1.619224,0.049720,-0.484466,-0.677437,0.611332,1.317376,0.661558,0.043604
3,0.348334,1.064006,0.259524,-0.248101,-0.369744,0.290407,-0.051329,-1.120842,-0.137919,0.925802
4,0.277944,-0.780229,-0.668735,-0.035403,-0.249287,-0.668084,0.388385,1.016835,0.540469,0.264835
...,...,...,...,...,...,...,...,...,...,...
140115,0.154721,-0.166833,-0.130907,-0.574310,-0.011494,0.051275,-0.683490,-0.323395,-0.552799,-0.050861
140116,0.412532,0.118743,-0.169684,-0.391341,0.261071,-0.287378,-0.421448,-0.916625,-0.403891,0.464232
140117,0.212563,0.219343,0.084996,0.070092,0.067583,-0.982883,0.118424,0.007932,0.506940,-0.431113
140118,0.436170,0.250175,1.235964,-0.496356,0.017950,0.227348,-0.531077,0.189836,-0.309316,-0.219622


In [12]:
batch_sz_trn = 4096
batch_sz_val = int(batch_sz_trn / 4)

train_dataset = MyDataset(train)
val_dataset = MyDataset(val)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_sz_trn, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_sz_val, shuffle=True)

In [13]:
data_iter = iter(train_loader)
next(data_iter)[0]

tensor([ 0.4308,  0.4105, -0.4838, -0.5531,  0.0401, -0.0802, -0.6151, -0.6242,
        -0.5653,  0.3543], dtype=torch.float64)

In [14]:
model = MLPModel(input_size=input_size, 
                 output_size=output_size, 
                 hidden_layers=hidden_layers, 
                 neurons_per_layer=neurons_per_layer)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=3, gamma=0.2)

device = torch.device("cuda")
model.to(device)
model.double()

MLPModel(
  (block): Sequential(
    (0): Linear(in_features=9, out_features=60, bias=True)
    (1): ReLU()
    (2): Linear(in_features=60, out_features=60, bias=True)
    (3): ReLU()
    (4): Linear(in_features=60, out_features=60, bias=True)
    (5): ReLU()
    (6): Linear(in_features=60, out_features=60, bias=True)
    (7): ReLU()
    (8): Linear(in_features=60, out_features=60, bias=True)
    (9): ReLU()
    (10): Linear(in_features=60, out_features=1, bias=True)
  )
)

In [None]:
epochs = 6000
best_loss = float('inf')
PATH = f"./best_model_{dt_name}.pt"

early_stopper = EarlyStopper(patience=300, path=PATH)
history = {
    "train_loss": [],
    "val_loss": [],
    "train_coefficient": [],
    "val_coefficient": [],
    "learning_rates": [],
    "epoch_times": []
}


for epoch in range(epochs):
    start_time = time.time()
    model.train()
    Loss_train = 0 
    coeff_train = 0

    with tqdm(train_loader, unit="batch") as trainer:
        for batch in trainer:
            trainer.set_description("Train")
            train_feat = batch[:, 0:-1].to(device)
            train_labs = batch[:, -1].to(device)
            train_pred = model(train_feat).squeeze()
            train_loss = torch.nn.functional.mse_loss(train_pred, train_labs)
            train_coef = coeff_determination(train_pred, train_labs) 

            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            Loss_train += train_loss.item()
            coeff_train += train_coef.item()

        Loss_train /= len(trainer)
        coeff_train /= len(trainer)

    model.eval()
    Loss_val = 0
    coeff_val = 0
    with tqdm(val_loader, unit="batch") as validator:
        for batch in validator:
            validator.set_description("Valid")
            val_feat = batch[:, 0:-1].to(device)
            val_labs = batch[:, -1].to(device)
            val_pred = model(val_feat).squeeze()
            val_loss = torch.nn.functional.mse_loss(val_pred, val_labs)
            val_coef = coeff_determination(val_pred, val_labs) 

            Loss_val += val_loss.item()
            coeff_val += val_coef.item()

        Loss_val /= len(validator)
        coeff_val /= len(validator)
    
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    history["learning_rates"].append(current_lr)
    
    epoch_duration = time.time() - start_time

    history["train_loss"].append(Loss_train)
    history["val_loss"].append(Loss_val)
    history["train_coefficient"].append(coeff_train)
    history["val_coefficient"].append(coeff_val)
    history["learning_rates"].append(optimizer.param_groups[0]['lr'])
    history["epoch_times"].append(epoch_duration)

    if early_stopper.early_stop(model.state_dict(), Loss_val):             
        print("Early stopping...")
        break
    
    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {Loss_train:.4f}, Train Coeff: {coeff_train:.4f}, "
          f"Val Loss: {Loss_val:.4f}, Val Coeff: {coeff_val:.4f}, Time: {epoch_duration:.2f} sec")

    
with open(f"training_history_{dt_name}.json", "w") as f:
    json.dump(history, f)
    
print(f"Training history saved to 'training_history_{dt_name}.json'")

data_iter = iter(train_loader)
next(data_iter)[:,0:-1]

traced_script_module = torch.jit.trace(model, next(data_iter)[:,0:-1].to(device))
traced_script_module.save(f"traced_model_{dt_name}.pt")

print(f"Training history saved to 'traced_model_{dt_name}.pt'")

Train: 100%|██████████| 137/137 [00:21<00:00,  6.29batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 25.35batch/s]


Epoch 1/6000, Train Loss: 0.9887, Train Coeff: -2662.8802, Val Loss: 0.9593, Val Coeff: -40.6907, Time: 27.19 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.63batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 25.00batch/s]


Epoch 2/6000, Train Loss: 0.9347, Train Coeff: -20.4934, Val Loss: 0.9003, Val Coeff: -9.3260, Time: 26.14 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.66batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.82batch/s]


Epoch 3/6000, Train Loss: 0.8893, Train Coeff: -8.9598, Val Loss: 0.8805, Val Coeff: -10.3717, Time: 26.11 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.63batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.91batch/s]


Epoch 4/6000, Train Loss: 0.8583, Train Coeff: -6.4126, Val Loss: 0.8503, Val Coeff: -7.2742, Time: 26.17 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.63batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.97batch/s]


Epoch 5/6000, Train Loss: 0.8466, Train Coeff: -5.6285, Val Loss: 0.8420, Val Coeff: -6.8638, Time: 26.14 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.60batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.69batch/s]


Epoch 6/6000, Train Loss: 0.8401, Train Coeff: -5.2105, Val Loss: 0.8329, Val Coeff: -4.9219, Time: 26.31 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.61batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.96batch/s]


Epoch 7/6000, Train Loss: 0.8325, Train Coeff: -4.8139, Val Loss: 0.8298, Val Coeff: -5.1102, Time: 26.22 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.64batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.82batch/s]


Epoch 8/6000, Train Loss: 0.8288, Train Coeff: -4.7220, Val Loss: 0.8282, Val Coeff: -5.6228, Time: 26.15 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.66batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.89batch/s]


Epoch 9/6000, Train Loss: 0.8280, Train Coeff: -4.7550, Val Loss: 0.8271, Val Coeff: -4.7903, Time: 26.10 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.65batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 25.09batch/s]


Epoch 10/6000, Train Loss: 0.8268, Train Coeff: -4.6018, Val Loss: 0.8267, Val Coeff: -5.0656, Time: 26.07 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.61batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.79batch/s]


Epoch 11/6000, Train Loss: 0.8259, Train Coeff: -4.6120, Val Loss: 0.8259, Val Coeff: -4.9267, Time: 26.27 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.62batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.85batch/s]


Epoch 12/6000, Train Loss: 0.8256, Train Coeff: -4.6346, Val Loss: 0.8260, Val Coeff: -4.7161, Time: 26.20 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.64batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.95batch/s]


Epoch 13/6000, Train Loss: 0.8253, Train Coeff: -4.5220, Val Loss: 0.8257, Val Coeff: -4.7412, Time: 26.14 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.65batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.89batch/s]


Epoch 14/6000, Train Loss: 0.8253, Train Coeff: -4.5000, Val Loss: 0.8254, Val Coeff: -4.8191, Time: 26.12 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.64batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.68batch/s]


Epoch 15/6000, Train Loss: 0.8254, Train Coeff: -4.5711, Val Loss: 0.8257, Val Coeff: -4.8541, Time: 26.19 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.65batch/s]
Valid: 100%|██████████| 137/137 [00:05<00:00, 24.88batch/s]


Epoch 16/6000, Train Loss: 0.8256, Train Coeff: -4.5676, Val Loss: 0.8254, Val Coeff: -4.8459, Time: 26.13 sec


Train: 100%|██████████| 137/137 [00:20<00:00,  6.66batch/s]
Valid:  11%|█         | 15/137 [00:00<00:04, 25.22batch/s]