diff --git a/Allwmake b/Allwmake
index cbed1083..20ee38f5 100755
--- a/Allwmake
+++ b/Allwmake
@@ -19,8 +19,8 @@ wmake src/dynamicMesh
 wmake src/dynamicFvMesh
 
 wmake applications/solvers/df0DFoam
-wmake applications/solvers/dfLowMachFoam
-wmake applications/solvers/dfHighSpeedFoam
-wmake applications/solvers/dfSprayFoam
+# wmake applications/solvers/dfLowMachFoam
+# wmake applications/solvers/dfHighSpeedFoam
+# wmake applications/solvers/dfSprayFoam
 
 wmake applications/utilities/flameSpeed
diff --git a/applications/solvers/df0DFoam/Make/options b/applications/solvers/df0DFoam/Make/options
index a3fc8987..d689953e 100644
--- a/applications/solvers/df0DFoam/Make/options
+++ b/applications/solvers/df0DFoam/Make/options
@@ -1,4 +1,12 @@
+-include $(GENERAL_RULES)/mplibType
+PYTHON_INC_DIR := $(shell python3 -m pybind11 --includes)
+PYTHON_LIB_DIR := $(shell python3 -c "from distutils import sysconfig; \
+    import os.path as op; v = sysconfig.get_config_vars(); \
+    fpaths = [op.join(v[pv], v['LDLIBRARY']) for pv in ('LIBDIR', 'LIBPL')]; \
+    print(list(filter(op.exists, fpaths))[0])" | xargs dirname)
+
 EXE_INC = -std=c++14 \
+    $(PFLAGS) $(PINC) \
     -I$(LIB_SRC)/transportModels/compressible/lnInclude \
     -I$(LIB_SRC)/thermophysicalModels/basic/lnInclude \
     -I$(LIB_SRC)/TurbulenceModels/turbulenceModels/lnInclude \
@@ -10,9 +18,9 @@ EXE_INC = -std=c++14 \
     -I$(LIB_SRC)/dynamicFvMesh/lnInclude \
     -I$(DF_SRC)/CanteraMixture/lnInclude \
     -I$(DF_SRC)/dfChemistryModel/lnInclude \
+    -I$(LIB_SRC)/Pstream/mpi \
     -I$(CANTERA_ROOT)/include \
-    -I$(TORCH_ROOT)/include \
-    -I$(TORCH_ROOT)/include/torch/csrc/api/include
+    $(PYTHON_INC_DIR)
 
 EXE_LIBS = \
     -lcompressibleTransportModels \
@@ -23,12 +31,13 @@ EXE_LIBS = \
     -lmeshTools \
     -lsampling \
     -L$(FOAM_USER_LIBBIN) \
+    -L$(PYTHON_LIB_DIR) \
+    -L$(DF_SRC)/dfChemistryModel \
     -ldfFluidThermophysicalModels \
     -ldfCompressibleTurbulenceModels \
     -lCanteraMixture \
     -ldfChemistryModel \
     $(CANTERA_ROOT)/lib/libcantera.so \
-    $(TORCH_ROOT)/lib/libtorch.so \
-    $(TORCH_ROOT)/lib/libc10.so \
     -rdynamic \
-    -lpthread
+    -lpthread \
+    -lpython3.8
diff --git a/applications/solvers/df0DFoam/df0DFoam.C b/applications/solvers/df0DFoam/df0DFoam.C
index ac3df5c8..db36004a 100644
--- a/applications/solvers/df0DFoam/df0DFoam.C
+++ b/applications/solvers/df0DFoam/df0DFoam.C
@@ -7,27 +7,37 @@
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
+
     OpenFOAM is free software: you can redistribute it and/or modify it
     under the terms of the GNU General Public License as published by
     the Free Software Foundation, either version 3 of the License, or
     (at your option) any later version.
+
     OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     for more details.
+
     You should have received a copy of the GNU General Public License
     along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
+
 Application
     rhoPimpleFoam
+
 Description
     Transient solver for turbulent flow of compressible fluids for HVAC and
     similar applications, with optional mesh motion and mesh topology changes.
+
     Uses the flexible PIMPLE (PISO-SIMPLE) solution for time-resolved and
     pseudo-transient simulations.
+
 \*---------------------------------------------------------------------------*/
 #include "dfChemistryModel.H"
 #include "CanteraMixture.H"
 #include "hePsiThermo.H"
+#include <pybind11/embed.h>
+#include <pybind11/numpy.h>
+#include <pybind11/stl.h> //used to convert
 
 #include "fvCFD.H"
 #include "dynamicFvMesh.H"
@@ -44,9 +54,15 @@ Description
 
 int main(int argc, char *argv[])
 {
+    // initialize pybind
+    pybind11::scoped_interpreter guard{}; //start python interpreter
+
     #include "postProcess.H"
 
-    #include "setRootCaseLists.H"
+    // #include "setRootCaseLists.H"
+    #include "listOptions.H"
+    #include "setRootCase2.H"
+    #include "listOutput.H"
     #include "createTime.H"
     #include "createDynamicFvMesh.H"
     #include "createDyMControls.H"
@@ -99,4 +115,4 @@ int main(int argc, char *argv[])
 }
 
 
-// ************************************************************************* //
\ No newline at end of file
+// ************************************************************************* //
diff --git a/applications/solvers/df0DFoam/setRootCase2.H b/applications/solvers/df0DFoam/setRootCase2.H
new file mode 100644
index 00000000..45d966e6
--- /dev/null
+++ b/applications/solvers/df0DFoam/setRootCase2.H
@@ -0,0 +1,5 @@
+Foam::argList args(argc,argv,true,true,/*initialise=*/false);
+if (!args.checkRootCase())
+{
+    Foam::FatalError.exit();
+}
\ No newline at end of file
diff --git a/applications/solvers/dfSprayFoam/EEqn.H b/applications/solvers/dfSprayFoam/EEqn.H
index c014e7dc..6b82915e 100644
--- a/applications/solvers/dfSprayFoam/EEqn.H
+++ b/applications/solvers/dfSprayFoam/EEqn.H
@@ -1,14 +1,6 @@
 {
     volScalarField& he = thermo.he();
 
-
-    tmp<fvScalarMatrix> thcSource(new fvScalarMatrix(he, dimEnergy/dimTime));
-    fvScalarMatrix& hcSource = thcSource.ref();
-    forAll(Y, i)
-    {
-        hcSource.source() -= parcels.rhoTrans(i)*chemistry->mixture().Hc(i)/runTime.deltaT();
-    }
-    //hSource.source() -= parcels.hsTrans()/runTime.deltaT();// equivalent to parcels.Sh(he)
     fvScalarMatrix EEqn
     (
         fvm::ddt(rho, he) + mvConvection->fvmDiv(phi, he)
@@ -19,7 +11,6 @@
      ==
         rho*(U&g)
       + parcels.Sh(he)
-      + hcSource
       + fvc::div(hDiffCorrFlux)
       //+ radiation->Sh(thermo, he)
       //+ fvOptions(rho, he)
diff --git a/bashrc.raw b/bashrc.raw
index 3226621a..be308676 100644
--- a/bashrc.raw
+++ b/bashrc.raw
@@ -1,9 +1,7 @@
 export DF_ROOT=pwd
 export DF_SRC=pwd/src
 export SRC_ORIG=pwd/src_orig
-export TORCH_ROOT=TORCH_DIR
 export CANTERA_ROOT=CONDA_PREFIX
-export TORCH_ROOT=TORCH_DIR
 export CANTERA_DATA=$CANTERA_ROOT/share/cantera/data
 export LD_LIBRARY_PATH=$CANTERA_ROOT/lib:$LD_LIBRARY_PATH
-export LD_LIBRARY_PATH=$TORCH_ROOT/lib:$LD_LIBRARY_PATH
\ No newline at end of file
+export LD_LIBRARY_PATH=$TORCH_ROOT/lib:$LD_LIBRARY_PATH
diff --git a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/constant/CanteraTorchProperties b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/constant/CanteraTorchProperties
index 5b963032..fc0aecdd 100644
--- a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/constant/CanteraTorchProperties
+++ b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/constant/CanteraTorchProperties
@@ -31,7 +31,7 @@ zeroDReactor
 
 torch on;
 torchModel "new_modle.pt"; 
-torchParameters  
+torchParameters1  
 {
     Xmu  
     (
@@ -140,6 +140,18 @@ torchParameters
         3.357557946945996e-05
         0.0
     );
+    Tact 0  ;
+    Qdotact  0 ;
+}
+torchParameters2
+{
+    Tact 1700;
+    Qdotact  1e5;
+}
+torchParameters3
+{
+    Tact 2500;
+    Qdotact  5e5;
 }
 loadbalancing
 {
diff --git a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/inference2.py b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/inference2.py
new file mode 100644
index 00000000..bf0bd7b7
--- /dev/null
+++ b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/inference2.py
@@ -0,0 +1,173 @@
+from builtins import Exception, print
+from calendar import prcal
+import torch
+import numpy as np
+import math
+import time
+import json
+import os
+from easydict import EasyDict as edict
+import torch.profiler
+import os
+
+torch.set_printoptions(precision=10)
+print('position 0 in inference.py')
+device = torch.device("cuda")
+device_ids = range(torch.cuda.device_count())
+
+
+
+class MyGELU(torch.nn.Module):
+    def __init__(self):
+        super(MyGELU, self).__init__()
+        self.torch_PI = 3.1415926536
+
+    def forward(self, x):
+        return 0.5 * x * (1 + torch.tanh(
+            math.sqrt(2 / self.torch_PI) * (x + 0.044715 * torch.pow(x, 3))))
+
+
+def json2Parser(json_path):
+    """load json and return parser-like object"""
+    with open(json_path, 'r') as f:
+        args = json.load(f)
+    return edict(args)
+
+
+class Net(torch.nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        neurons = layers
+        self.depth = len(neurons) - 1
+        self.actfun = MyGELU()
+        self.layers = []
+        for i in range(self.depth - 1):
+            self.layers.append(torch.nn.Linear(neurons[i], neurons[i + 1]))
+            self.layers.append(self.actfun)
+        self.layers.append(torch.nn.Linear(neurons[-2], neurons[-1]))  # last layer
+        self.fc = torch.nn.Sequential(*self.layers)
+
+    def forward(self, x):
+        x = self.fc(x)
+        return x
+try:
+    #glbal variable will only init once when called interperter
+    #load parameters from json
+    setting0 = json2Parser('settingsdrm19_0.json')
+    setting1 = json2Parser('settingsdrm19_1.json')
+    setting2 = json2Parser('settingsdrm19_2.json')
+    
+    lamda = setting0.power_transform
+    delta_t = setting0.delta_t
+    dim = setting0.dim
+    layers = setting0.layers
+
+    Xmu0 = torch.tensor(setting0.Xmu).unsqueeze(0).to(device)
+    Xstd0 = torch.tensor(setting0.Xstd).unsqueeze(0).to(device=device)
+    Ymu0 = torch.tensor(setting0.Ymu).unsqueeze(0).to(device=device)
+    Ystd0 = torch.tensor(setting0.Ystd).unsqueeze(0).to(device=device)
+
+    Xmu1 = torch.tensor(setting1.Xmu).unsqueeze(0).to(device=device)
+    Xstd1 = torch.tensor(setting1.Xstd).unsqueeze(0).to(device=device)
+    Ymu1 = torch.tensor(setting1.Ymu).unsqueeze(0).to(device=device)
+    Ystd1 = torch.tensor(setting1.Ystd).unsqueeze(0).to(device=device)
+
+    Xmu2 = torch.tensor(setting2.Xmu).unsqueeze(0).to(device=device)
+    Xstd2 = torch.tensor(setting2.Xstd).unsqueeze(0).to(device=device)
+    Ymu2 = torch.tensor(setting2.Ymu).unsqueeze(0).to(device=device)
+    Ystd2 = torch.tensor(setting2.Ystd).unsqueeze(0).to(device=device)
+    print('position 1 in inference.py')
+
+    #load module  
+    model0 = Net()
+    model1 = Net()
+    model2 = Net()
+    check_point0 = torch.load('modeldrm19_0.pt')
+    check_point1 = torch.load('modeldrm19_1.pt')
+    check_point2 = torch.load('modeldrm19_2.pt')
+    model0.load_state_dict(check_point0)
+    model1.load_state_dict(check_point1)
+    model2.load_state_dict(check_point2)
+    model0.to(device=device)
+    model1.to(device=device)
+    model2.to(device=device)
+    if len(device_ids) > 1:
+        model0 = torch.nn.DataParallel(model0, device_ids=device_ids)
+        model1 = torch.nn.DataParallel(model1, device_ids=device_ids)
+        model2 = torch.nn.DataParallel(model2, device_ids=device_ids)
+    print('call init')
+except Exception as e:
+    print(e.args)
+
+
+def inference(vec0, vec1, vec2):
+    '''
+    use model to inference
+    '''
+    #args = np.reshape(args, (-1, 9)) #reshape to formed size
+    vec0 = np.reshape(vec0, (-1, 24))
+    vec1 = np.reshape(vec1, (-1, 24))
+    vec2 = np.reshape(vec2, (-1, 24))
+
+    try:
+        with torch.no_grad():
+            input0_ = torch.from_numpy(vec0).double().to(device=device) #cast ndarray to torch tensor
+            input1_ = torch.from_numpy(vec1).double().to(device=device) #cast ndarray to torch tensor
+            input2_ = torch.from_numpy(vec2).double().to(device=device) #cast ndarray to torch tensor
+
+            # pre_processing
+            rho0 = input0_[:, 0].unsqueeze(1)
+            input0_Y = input0_[:, 3:].clone()
+            input0_bct = input0_[:, 1:]
+            input0_bct[:, 2:] = (input0_bct[:, 2:]**(lamda) - 1) / lamda #BCT
+            input0_normalized = (input0_bct - Xmu0) / Xstd0
+            input0_normalized[:, -1] = 0 #set Y_AR to 0
+            input0_normalized = input0_normalized.float()
+
+            rho1 = input1_[:, 0].unsqueeze(1)
+            input1_Y = input1_[:, 3:].clone()
+            input1_bct = input1_[:, 1:]
+            input1_bct[:, 2:] = (input1_bct[:, 2:]**(lamda) - 1) / lamda #BCT
+            input1_normalized = (input1_bct - Xmu1) / Xstd1
+            input1_normalized[:, -1] = 0 #set Y_AR to 0
+            input1_normalized = input1_normalized.float()
+
+
+            rho2 = input2_[:, 0].unsqueeze(1)
+            input2_Y = input2_[:, 3:].clone()
+            input2_bct = input2_[:, 1:]
+            input2_bct[:, 2:] = (input2_bct[:, 2:]**(lamda) - 1) / lamda #BCT
+            input2_normalized = (input2_bct - Xmu2) / Xstd2
+            input2_normalized[:, -1] = 0 #set Y_AR to 0
+            input2_normalized = input2_normalized.float()
+
+            #inference
+            output0_normalized = model0(input0_normalized)
+            output1_normalized = model1(input1_normalized)
+            output2_normalized = model2(input2_normalized)
+
+
+            # post_processing
+            output0_bct = (output0_normalized * Ystd0 + Ymu0) * delta_t + input0_bct
+            output0_Y = (lamda * output0_bct[:, 2:] + 1)**(1 / lamda)
+            output0_Y = output0_Y / torch.sum(input=output0_Y, dim=1, keepdim=True)
+            output0 = (output0_Y - input0_Y) * rho0 / delta_t   
+            output0 = output0.cpu().numpy()
+
+            output1_bct = (output1_normalized * Ystd1 + Ymu1) * delta_t + input1_bct
+            output1_Y = (lamda * output1_bct[:, 2:] + 1)**(1 / lamda)
+            output1_Y = output1_Y / torch.sum(input=output1_Y, dim=1, keepdim=True)
+            output1 = (output1_Y - input1_Y) * rho1 / delta_t
+            output1 = output1.cpu().numpy()
+
+            output2_bct = (output2_normalized * Ystd2 + Ymu2) * delta_t + input2_bct
+            output2_Y = (lamda * output2_bct[:, 2:] + 1)**(1 / lamda)
+            output2_Y = output2_Y / torch.sum(input=output2_Y, dim=1, keepdim=True)
+            output2 = (output2_Y - input2_Y) * rho2 / delta_t
+            output2 = output2.cpu().numpy()
+
+            result = np.append(output0, output1, axis=0)
+            result = np.append(result, output2, axis=0)
+            return result
+    except Exception as e:
+        print(e.args)
diff --git a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/blockMeshDict b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/blockMeshDict
index 35edbf14..55fcf734 100644
--- a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/blockMeshDict
+++ b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/blockMeshDict
@@ -31,7 +31,7 @@ vertices
 
 blocks
 (
-    hex (0 1 2 3 4 5 6 7) (1 1 1) simpleGrading (1 1 1)
+    hex (0 1 2 3 4 5 6 7) (50 50 50) simpleGrading (1 1 1)
 );
 
 edges
diff --git a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/controlDict b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/controlDict
index 04266ca8..c5e77aed 100644
--- a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/controlDict
+++ b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/controlDict
@@ -23,7 +23,7 @@ startTime       0;
 
 stopAt          endTime;
 
-endTime         0.001;
+endTime         1e-3;
 
 deltaT          1e-06;
 
@@ -33,7 +33,7 @@ adjustTimeStep  off;
 
 writeControl    runTime;
 
-writeInterval   0.01
+writeInterval   0.01;
 
 purgeWrite      0;
 
diff --git a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/decomposeParDict b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/decomposeParDict
new file mode 100644
index 00000000..b4aa51b8
--- /dev/null
+++ b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/decomposeParDict
@@ -0,0 +1,45 @@
+/*--------------------------------*- C++ -*----------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     | Website:  https://openfoam.org
+    \\  /    A nd           | Version:  7
+     \\/     M anipulation  |
+\*---------------------------------------------------------------------------*/
+FoamFile
+{
+    version     2.0;
+    format      ascii;
+    class       dictionary;
+    location    "system";
+    object      decomposeParDict;
+}
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+numberOfSubdomains 8;
+
+method          scotch;
+
+simpleCoeffs
+{
+    n               (4 1 1);
+    delta           0.001;
+}
+
+hierarchicalCoeffs
+{
+    n               (1 1 1);
+    delta           0.001;
+    order           xyz;
+}
+
+manualCoeffs
+{
+    dataFile        "";
+}
+
+distributed     no;
+
+roots           ( );
+
+
+// ************************************************************************* //
diff --git a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/probes b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/probes
index 69419544..e327da35 100644
--- a/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/probes
+++ b/examples/df0DFoam/zeroD_cubicReactor/CH4/torchSolver/system/probes
@@ -12,7 +12,7 @@ Description
 
 #includeEtc "caseDicts/postProcessing/probes/probes.cfg"
 
-fields (T);// Writes out T
+fields (T Qdot);// Writes out T
 probeLocations
 (
     (0.0025 0.0025 0.0025)
diff --git a/install.sh b/install.sh
index 5158d135..ce4b25bd 100755
--- a/install.sh
+++ b/install.sh
@@ -1,25 +1,6 @@
-#!/bin/sh
-
-if [ -d "thirdParty/libtorch" ]; then
-    echo "libtorch exist."
-else
-
-    if [ -e libtorch-cxx11-abi-shared-with-deps-1.11.0+cpu.zip ]
-    then
-        echo "libtorch.zip exist."
-    else
-        wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.11.0%2Bcpu.zip
-    fi
-
-    unzip libtorch-cxx11-abi-shared-with-deps-1.11.0+cpu.zip -d thirdParty
-fi
-
-
 cp bashrc.raw bashrc
 sed -i s#pwd#$PWD#g ./bashrc
 sed -i s#CONDA_PREFIX#$CONDA_PREFIX#g ./bashrc
-TORCH_DIR=$PWD/thirdParty/libtorch
-sed -i s#TORCH_DIR#$TORCH_DIR#g ./bashrc
 
 
 
@@ -42,4 +23,4 @@ fi
 
 
 source ./bashrc
-./Allwmake -j
\ No newline at end of file
+./Allwmake -j
diff --git a/src/dfChemistryModel/GpuInference/DynamicBuffer.C b/src/dfChemistryModel/GpuInference/DynamicBuffer.C
new file mode 100644
index 00000000..67aa4764
--- /dev/null
+++ b/src/dfChemistryModel/GpuInference/DynamicBuffer.C
@@ -0,0 +1,28 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | DLBFoam: Dynamic Load Balancing 
+   \\    /   O peration     | for fast reactive simulations
+    \\  /    A nd           | 
+     \\/     M anipulation  | 2020, Aalto University, Finland
+-------------------------------------------------------------------------------
+License
+    This file is part of DLBFoam library, derived from OpenFOAM.
+
+    https://github.com/blttkgl/DLBFoam
+
+    OpenFOAM is free software: you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
+    
+\*---------------------------------------------------------------------------*/
+#include "DynamicBuffer.H"
+namespace Foam{
+
+}
\ No newline at end of file
diff --git a/src/dfChemistryModel/GpuInference/DynamicBuffer.H b/src/dfChemistryModel/GpuInference/DynamicBuffer.H
new file mode 100644
index 00000000..9c2d9cd2
--- /dev/null
+++ b/src/dfChemistryModel/GpuInference/DynamicBuffer.H
@@ -0,0 +1,48 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | DLBFoam: Dynamic Load Balancing 
+   \\    /   O peration     | for fast reactive simulations
+    \\  /    A nd           | 
+     \\/     M anipulation  | 2020, Aalto University, Finland
+-------------------------------------------------------------------------------
+License
+    This file is part of DLBFoam library, derived from OpenFOAM.
+
+    https://github.com/blttkgl/DLBFoam
+
+    OpenFOAM is free software: you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
+
+Class
+    Foam::DynamicBuffer
+
+Description
+    Currently just a typedef to DynamicList<DynamicList>. Could possibly be made
+    constant size at some point to avoid allocations during runtime.
+
+\*---------------------------------------------------------------------------*/
+
+#ifndef DynamicBuffer_H
+#define DynamicBuffer_H
+
+#include "DynamicList.H"
+
+namespace Foam
+{
+
+template <class T>
+using DynamicBuffer = DynamicList<DynamicList<T>>;
+
+}
+
+#endif
\ No newline at end of file
diff --git a/src/dfChemistryModel/GpuInference/GpuProblem.C b/src/dfChemistryModel/GpuInference/GpuProblem.C
new file mode 100644
index 00000000..49dd9ccd
--- /dev/null
+++ b/src/dfChemistryModel/GpuInference/GpuProblem.C
@@ -0,0 +1,29 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright (C) 2011-2018 OpenFOAM Foundation
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software: you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
+
+\*---------------------------------------------------------------------------*/
+
+#include "GpuProblem.H"
+namespace Foam{
+
+}
\ No newline at end of file
diff --git a/src/dfChemistryModel/GpuInference/GpuProblem.H b/src/dfChemistryModel/GpuInference/GpuProblem.H
new file mode 100644
index 00000000..7fcbe2d5
--- /dev/null
+++ b/src/dfChemistryModel/GpuInference/GpuProblem.H
@@ -0,0 +1,99 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright (C) 2011-2018 OpenFOAM Foundation
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software: you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
+
+Class
+    Foam::GpuProblem
+
+Description
+    A small object containing everything required for solving the reaction rate
+    using the ODE solver. These are passed around in the load balancer.
+
+\*---------------------------------------------------------------------------*/
+
+#ifndef GpuProblem_H
+#define GpuProblem_H
+
+#include "volFields.H"
+
+namespace Foam
+{
+
+struct GpuProblem
+{
+
+    GpuProblem() = default;
+    GpuProblem(label nSpecie)
+        : Y(nSpecie), Ti(0), pi(0), rhoi(0), DNNid(0), cellid(0)
+    {
+    }
+
+    scalarList Y;
+    scalar Ti;
+    scalar pi;
+    scalar rhoi;
+    label DNNid;
+    label cellid;
+
+    // TODO: implement!
+    bool operator==(const GpuProblem& rhs) const
+    {
+        return false;
+    }
+
+    bool operator!=(const GpuProblem& rhs) const
+    {
+        return !(*this == rhs);
+    }
+};
+
+//- Serialization for send
+static inline Ostream& operator<<(Ostream& os, const GpuProblem& p)
+{
+
+    os << p.Y;
+    os << p.Ti;
+    os << p.pi;
+    os << p.rhoi;
+    os << p.DNNid;
+    os << p.cellid;
+
+    return os;
+}
+
+//- Get a serialized problem from IStream
+static inline Istream& operator>>(Istream& is, GpuProblem& p)
+{
+
+    is >> p.Y;
+    is >> p.Ti;
+    is >> p.pi;
+    is >> p.rhoi;
+    is >> p.DNNid;
+    is >> p.cellid;
+
+    return is;
+}
+
+} // namespace Foam
+
+#endif
\ No newline at end of file
diff --git a/src/dfChemistryModel/GpuInference/GpuSolution.C b/src/dfChemistryModel/GpuInference/GpuSolution.C
new file mode 100644
index 00000000..a5ef62c6
--- /dev/null
+++ b/src/dfChemistryModel/GpuInference/GpuSolution.C
@@ -0,0 +1,36 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright (C) 2011-2018 OpenFOAM Foundation
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software: you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
+
+Class
+    Foam::GpuProblem
+
+Description
+    A small object containing everything required for solving the reaction rate
+    using the ODE solver. These are passed around in the load balancer.
+
+\*---------------------------------------------------------------------------*/
+
+#include "GpuSolution.H"
+namespace Foam{
+
+}
\ No newline at end of file
diff --git a/src/dfChemistryModel/GpuInference/GpuSolution.H b/src/dfChemistryModel/GpuInference/GpuSolution.H
new file mode 100644
index 00000000..fa88a674
--- /dev/null
+++ b/src/dfChemistryModel/GpuInference/GpuSolution.H
@@ -0,0 +1,86 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | DLBFoam: Dynamic Load Balancing 
+   \\    /   O peration     | for fast reactive simulations
+    \\  /    A nd           | 
+     \\/     M anipulation  | 2020, Aalto University, Finland
+-------------------------------------------------------------------------------
+License
+    This file is part of DLBFoam library, derived from OpenFOAM.
+
+    https://github.com/blttkgl/DLBFoam
+
+    OpenFOAM is free software: you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
+
+Class
+    Foam::GpuSolution
+    
+Description
+    A small object containing everything required for updating the reaction rate
+    and the chemistry time step. These are passed around in the load balancer.
+
+\*---------------------------------------------------------------------------*/
+
+#ifndef GpuSolution_H
+#define GpuSolution_H
+
+#include "volFields.H"
+
+namespace Foam
+{
+
+struct GpuSolution
+{
+
+    GpuSolution() = default;
+
+    GpuSolution(label nspecie)
+        : Ti(0), RRi(nspecie, 0.0), cellid(0)
+    {
+    }
+
+    bool operator==(const GpuSolution& rhs) const
+    {
+        return false;
+    }
+
+    bool operator!=(const GpuSolution& rhs) const
+    {
+        return !(*this == rhs);
+    }
+
+    scalar Ti;
+    scalarList RRi;
+    label cellid;
+};
+
+//- Serialization for send
+static inline Ostream& operator<<(Ostream& os, const GpuSolution& s)
+{
+    os << s.Ti;
+    os << s.RRi;
+    os << s.cellid;
+    return os;
+}
+
+//- Get a serialized solution from IStream
+static inline Istream& operator>>(Istream& is, GpuSolution& s)
+{
+    is >> s.Ti;
+    is >> s.RRi;
+    is >> s.cellid;
+    return is;
+}
+
+} // namespace Foam
+
+#endif
\ No newline at end of file
diff --git a/src/dfChemistryModel/Make/files b/src/dfChemistryModel/Make/files
index f130bf76..7046caa9 100644
--- a/src/dfChemistryModel/Make/files
+++ b/src/dfChemistryModel/Make/files
@@ -8,6 +8,9 @@ loadBalancing/algorithms_DLB.C
 loadBalancing/runtime_assert.C
 loadBalancing/LoadBalancer.C
 
+GpuInference/GpuProblem.C
+GpuInference/GpuSolution.C
+
 makeDfChemistryModels.C
 
 LIB = $(FOAM_USER_LIBBIN)/libdfChemistryModel
\ No newline at end of file
diff --git a/src/dfChemistryModel/Make/options b/src/dfChemistryModel/Make/options
index 9a248630..9efe95b0 100644
--- a/src/dfChemistryModel/Make/options
+++ b/src/dfChemistryModel/Make/options
@@ -1,4 +1,12 @@
+-include $(GENERAL_RULES)/mplibType
+PYTHON_INC_DIR := $(shell python3 -m pybind11 --includes)
+PYTHON_LIB_DIR := $(shell python3 -c "from distutils import sysconfig; \
+    import os.path as op; v = sysconfig.get_config_vars(); \
+    fpaths = [op.join(v[pv], v['LDLIBRARY']) for pv in ('LIBDIR', 'LIBPL')]; \
+    print(list(filter(op.exists, fpaths))[0])" | xargs dirname)
+
 EXE_INC = -std=c++14 \
+    $(PFLAGS) $(PINC) \
     -I$(LIB_SRC)/transportModels/compressible/lnInclude \
     -I$(LIB_SRC)/thermophysicalModels/basic/lnInclude \
     -I$(LIB_SRC)/TurbulenceModels/turbulenceModels/lnInclude \
@@ -6,18 +14,17 @@ EXE_INC = -std=c++14 \
     -I$(LIB_SRC)/finiteVolume/lnInclude \
     -I$(DF_SRC)/CanteraMixture/lnInclude \
     -I$(CANTERA_ROOT)/include \
-    -I$(TORCH_ROOT)/include \
-    -I$(TORCH_ROOT)/include/torch/csrc/api/include
+    -I$(LIB_SRC)/Pstream/mpi \
+    $(PYTHON_INC_DIR)
 
 EXE_LIBS = \
     -lcompressibleTransportModels \
     -lturbulenceModels \
     -L$(FOAM_USER_LIBBIN) \
+    -L$(PYTHON_LIB_DIR) \
     -ldfFluidThermophysicalModels \
     -ldfCompressibleTurbulenceModels \
     -lCanteraMixture \
     $(CANTERA_ROOT)/lib/libcantera.so \
-    $(TORCH_ROOT)/lib/libtorch.so \
-    $(TORCH_ROOT)/lib/libc10.so \
     -rdynamic \
     -lpthread
diff --git a/src/dfChemistryModel/dfChemistryModel.C b/src/dfChemistryModel/dfChemistryModel.C
index e80e1a7d..57119df9 100644
--- a/src/dfChemistryModel/dfChemistryModel.C
+++ b/src/dfChemistryModel/dfChemistryModel.C
@@ -27,6 +27,7 @@ License
 #include "UniformField.H"
 #include "clockTime.H"
 #include "runtime_assert.H"
+#include <unistd.h>
 
 
 // * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //
@@ -102,12 +103,17 @@ Foam::dfChemistryModel<ThermoType>::dfChemistryModel
     if(torchSwitch_)
     {
         torchModelName_ = this->lookupOrDefault("torchModel", word(""));
-        Xmu_ = scalarList(this->subDict("torchParameters").lookup("Xmu"));
-        Xstd_ = scalarList(this->subDict("torchParameters").lookup("Xstd"));
-        Ymu_ = scalarList(this->subDict("torchParameters").lookup("Ymu"));
-        Ystd_ = scalarList(this->subDict("torchParameters").lookup("Ystd"));
-        Tact_ = this->subDict("torchParameters").lookupOrDefault("Tact", 700);
-        Qdotact_ = this->subDict("torchParameters").lookupOrDefault("Qdotact", 1e9);
+
+        Tact1_ = this->subDict("torchParameters1").lookupOrDefault("Tact", 700);
+        Qdotact1_ = this->subDict("torchParameters1").lookupOrDefault("Qdotact", 1e9);
+
+        Tact2_ = this->subDict("torchParameters2").lookupOrDefault("Tact", 700);
+        Qdotact2_ = this->subDict("torchParameters2").lookupOrDefault("Qdotact", 1e9);
+
+        Tact3_ = this->subDict("torchParameters3").lookupOrDefault("Tact", 700);
+        Qdotact3_ = this->subDict("torchParameters3").lookupOrDefault("Qdotact", 1e9);
+        
+        coresPerGPU = this->subDict("torchParameters1").lookupOrDefault("coresPerGPU", 8);
     }
 
     for(const auto& name : CanteraGas_->speciesNames())
@@ -180,7 +186,7 @@ Foam::dfChemistryModel<ThermoType>::dfChemistryModel
     {
         cpuSolveFile_ = logFile("cpu_solve.out");
         cpuSolveFile_() << "                  time" << tab
-                        << "           getProblems" << tab
+                        << "           getProblems" << tab  
                         << "           updateState" << tab
                         << "               balance" << tab
                         << "           solveBuffer" << tab
@@ -198,7 +204,6 @@ Foam::dfChemistryModel<ThermoType>::dfChemistryModel
     }
 }
 
-
 // * * * * * * * * * * * * * * * * Destructor  * * * * * * * * * * * * * * * //
 
 template<class ThermoType>
@@ -220,11 +225,11 @@ Foam::scalar Foam::dfChemistryModel<ThermoType>::solve
     scalar result = 0;
     if(torchSwitch_)
     {
-        result = torchSolve(deltaT);
+        result = torchDCUSolve(deltaT);
     }
     else
     {
-        result = solve_loadBalance(deltaT);
+        result = canteraSolve(deltaT);
     }
     return result;
 }
@@ -278,6 +283,8 @@ Foam::scalar Foam::dfChemistryModel<ThermoType>::canteraSolve
 
     Qdot_ = Zero;
 
+    Info << "Y_Chem" << Y_[0][826]<<endl;
+
     forAll(T_, cellI)
     {
         scalar Ti = T_[cellI];
@@ -338,13 +345,10 @@ void Foam::dfChemistryModel<ThermoType>::setNumerics(Cantera::ReactorNet &sim)
     sim.setTolerances(relTol_,absTol_);
 }
 
-
-template<class ThermoType>
-template<class DeltaTType>
-Foam::scalar Foam::dfChemistryModel<ThermoType>::torchSolve
-(
-    const DeltaTType& deltaT
-)
+template <class ThermoType>
+template <class DeltaTType>
+Foam::scalar Foam::dfChemistryModel<ThermoType>::torchCUDAoneCoreSolve(
+    const DeltaTType &deltaT)
 {
     scalar deltaTMin = great;
 
@@ -352,84 +356,56 @@ Foam::scalar Foam::dfChemistryModel<ThermoType>::torchSolve
     {
         return deltaTMin;
     }
-
-    Info<<"=== begin torch&ode-solve === "<<endl;
+    std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
+    Info << "=== begin torchCUDAoneCore-solve === " << endl;
 
     // set variables
     scalarList yPre_(mixture_.nSpecies());
     scalarList yBCT_(mixture_.nSpecies());
-    scalarList u_(mixture_.nSpecies()+2); //plus T and p
+    scalarList u_(mixture_.nSpecies() + 2);
     Cantera::Reactor react;
     double lambda = 0.1;
 
-    // load model
-    torch::DeviceType device_type;
-    torch::DeviceIndex index = -1;
-    if (0)
-    {
-        device_type = torch::kCUDA;
-        index = 0;
-    }
-    else
-    {
-        device_type = torch::kCPU;
-        index = 0;
-    }
-    torch::Device device(device_type, index);
-    if (!isFile(torchModelName_))
-    {
-        FatalErrorInFunction
-            << torchModelName_
-            << " doesn't exist!"
-            << exit(FatalError);
-    }
-    torch::jit::script::Module torchModel_ = torch::jit::load(torchModelName_, device);
-
-    std::vector<label> torch_cell;
-    label torch_cellname= 0;
+    std::vector<size_t> torch_cell;
+    label torch_cellname = 0;
 
     // obtain the number of DNN cells
+    std::chrono::steady_clock::time_point start_0 = std::chrono::steady_clock::now();
     forAll(T_, cellI)
     {
-        if ((Qdot_[cellI] >= Qdotact_) && (T_[cellI] >= Tact_))
+        if (T_[cellI] >= Tact1_)
         {
             torch_cell.push_back(cellI);
         }
     }
 
-    torch::Tensor inputs(torch::zeros({torch_cell.size(),mixture_.nSpecies()+2}));
+    // generate GPU inputs and solve CVODE cells
+    std::vector<double> inputs_;
+    inputs_.reserve(torch_cell.size() * (CanteraGas_->nSpecies() + 3));
+
     forAll(T_, cellI)
     {
         scalar Ti = T_[cellI];
         scalar pi = p_[cellI];
         scalar rhoi = rho_[cellI];
 
-        if ((Qdot_[cellI] >= Qdotact_) && (Ti >= Tact_))
+        if (Ti >= Tact1_)
         {
             Qdot_[cellI] = 0.0;
-            // Info<<"Now is DNN "<<endl;
 
             // set inputs
-            std::vector<double> inputs_;
-            inputs_.push_back((Ti - Xmu_[0])/Xstd_[0]);
-            inputs_.push_back((pi / 101325 - Xmu_[1])/Xstd_[1]);
-            for (size_t i=0; i<CanteraGas_->nSpecies(); ++i)
-            {
-                yPre_[i] = Y_[i][cellI];
-                yBCT_[i] = (pow(yPre_[i],lambda) - 1) / lambda; // function BCT
-            }
-            for (size_t i=0; i<CanteraGas_->nSpecies(); ++i)
+            inputs_.push_back(rhoi);
+            inputs_.push_back(Ti);
+            inputs_.push_back(pi/101325);
+            for (size_t i = 0; i < CanteraGas_->nSpecies(); i++)
             {
-                inputs_.push_back((yBCT_[i] - Xmu_[i+2]) / Xstd_[i+2]);
+                inputs_.push_back(Y_[i][cellI]);
             }
-            inputs[torch_cellname] = torch::tensor(inputs_);
-            torch_cellname ++;
-            // Info << "cell_name = "<< cellI <<endl;
         }
         else
         {
             Qdot_[cellI] = 0.0;
-            for (size_t i=0; i<CanteraGas_->nSpecies(); ++i)
+            for (size_t i = 0; i < CanteraGas_->nSpecies(); i++)
             {
                 yPre_[i] = Y_[i][cellI];
             }
@@ -441,50 +417,367 @@ Foam::scalar Foam::dfChemistryModel<ThermoType>::torchSolve
             Cantera::ReactorNet sim;
             sim.addReactor(react);
             setNumerics(sim);
-            sim.advance(deltaT[cellI]);
+            sim.advance(deltaT);
 
             CanteraGas_->getMassFractions(yTemp_.begin());
 
-            for (size_t i=0; i<CanteraGas_->nSpecies(); ++i)
+            for (size_t i = 0; i < CanteraGas_->nSpecies(); i++)
             {
-                RR_[i][cellI] = (yTemp_[i] - yPre_[i])*rhoi/deltaT[cellI];
-                Qdot_[cellI] -= hc_[i]*RR_[i][cellI];
+                RR_[i][cellI] = (yTemp_[i] - yPre_[i]) * rhoi / deltaT;
+                Qdot_[cellI] -= hc_[i] * RR_[i][cellI];
             }
         }
     }
+    std::chrono::steady_clock::time_point stop_0 = std::chrono::steady_clock::now();
+    std::chrono::duration<double> processingTime_0 = std::chrono::duration_cast<std::chrono::duration<double>>(stop_0 - start_0);
+    std::cout << "beforeCUDATime = " << processingTime_0.count() << std::endl;
+
     // DNN
-    std::vector<torch::jit::IValue> INPUTS{inputs};
-    auto outputs_raw = torchModel_.forward(INPUTS);
-    auto outputs = outputs_raw.toTensor();
 
-    for(size_t cellI = 0; cellI<torch_cell.size(); cellI ++)
+    std::chrono::steady_clock::time_point start_3 = std::chrono::steady_clock::now();
+
+    pybind11::array_t<double> vec = pybind11::cast(inputs_);
+    pybind11::module_ call_torch = pybind11::module_::import("inference_H2"); // import python file
+    pybind11::object result = call_torch.attr("inference")(vec); // call function
+    const double* star = result.cast<pybind11::array_t<double>>().data();
+
+    std::vector<double> outputsVec(star, star + torch_cell.size() * 7);
+
+    std::chrono::steady_clock::time_point stop_3 = std::chrono::steady_clock::now();
+    std::chrono::duration<double> processingTime_3 = std::chrono::duration_cast<std::chrono::duration<double>>(stop_3 - start_3);
+    std::cout << "CUDATime = " << processingTime_3.count() << std::endl;
+
+    std::chrono::steady_clock::time_point start_2 = std::chrono::steady_clock::now();
+    for (size_t cellI = 0; cellI < torch_cell.size(); cellI++)
     {
         // update y
-        scalar Yt = 0;
-        for (size_t i=0; i<CanteraGas_->nSpecies(); ++i)
+        for (size_t i = 0; i < CanteraGas_->nSpecies(); i++)
+        {
+            RR_[i][torch_cell[cellI]] = outputsVec[cellI * 7 + i];
+            Qdot_[cellI] -= hc_[i] * RR_[i][cellI];
+        }
+    }
+    std::chrono::steady_clock::time_point stop_2 = std::chrono::steady_clock::now();
+    std::chrono::duration<double> processingTime_2 = std::chrono::duration_cast<std::chrono::duration<double>>(stop_2 - start_2);
+    std::cout << "afterCUDATime = " << processingTime_2.count() << std::endl;
+
+    Info << "=== end torch&ode-solve === " << endl;
+    std::chrono::steady_clock::time_point stop = std::chrono::steady_clock::now();
+    std::chrono::duration<double> processingTime = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start);
+    std::cout << "allSolveTime = " << processingTime.count() << std::endl;
+
+    return deltaTMin;
+}
+
+template <class ThermoType>
+template<class DeltaTType>
+Foam::DynamicList<Foam::GpuProblem>
+Foam::dfChemistryModel<ThermoType>::getGPUProblems
+(
+    const DeltaTType& deltaT
+)
+{
+    DynamicList<GpuProblem> problemList; //single core TODO:rename it
+
+    // get cuda problemList, for all cell
+    // each get problem
+    forAll(T_, cellI)
+    {
+        scalar Ti = T_[cellI];
+        scalar pi = p_[cellI];
+        scalar rhoi = rho_[cellI];
+
+        // set problems
+        GpuProblem problem(CanteraGas_->nSpecies());
+        problem.cellid = cellI;
+        problem.Ti = Ti;
+        problem.pi = pi/101325;
+        for (size_t i = 0; i < CanteraGas_->nSpecies(); i++)
         {
-            yPre_[i] = Y_[i][torch_cell[cellI]];
-            yTemp_[i] = Y_[i][torch_cell[cellI]];
-            yBCT_[i] = (pow(yPre_[i],lambda) - 1) / lambda; // function BCT
+            problem.Y[i] = Y_[i][cellI];
         }
-        for (size_t i=0; i<(CanteraGas_->nSpecies()); ++i)//
+        // choose DNN module
+        if ((Qdot_[cellI] < Qdotact2_) && (T_[cellI] <= Tact2_)  && ( T_[cellI] >= Tact1_))//choose1
         {
-            u_[i+2] = outputs[cellI][i+2].item().to<double>()*Ystd_[i+2]+Ymu_[i+2];
-            yTemp_[i] = pow((yBCT_[i] + u_[i+2]*deltaT[cellI])*lambda+1,1/lambda);
-            Yt += yTemp_[i];
+            problem.DNNid = 0;
         }
-        for (size_t i=0; i<CanteraGas_->nSpecies(); ++i)
+        if(((Qdot_[cellI] <= Qdotact3_)&&(Qdot_[cellI] >= Qdotact2_) && (Tact3_ > T_[cellI])&&(T_[cellI] > Tact2_))||(Qdot_[cellI] > Qdotact3_))  //choose2
         {
-            yTemp_[i] = yTemp_[i] / Yt;
-            RR_[i][torch_cell[cellI]] = (yTemp_[i] - Y_[i][torch_cell[cellI]])*rho_[torch_cell[cellI]]/deltaT[cellI];
-            Qdot_[torch_cell[cellI]] -= hc_[i]*RR_[i][torch_cell[cellI]];
+            problem.DNNid = 1;
         }
+        if  ((Qdot_[cellI] <= Qdotact3_) && (T_[cellI] >= Tact3_))//if(Ti >= Tact_))//choose3
+        {
+            problem.DNNid = 2;
+        }
+        problem.rhoi = rhoi;
+        problemList.append(problem);
+        Qdot_[cellI] = 0.0;
     }
 
-    Info<<"=== end torch&ode-solve === "<<endl;
+    return problemList; 
+}
+
+template <class ThermoType>
+void Foam::dfChemistryModel<ThermoType>::getDNNinputs
+(
+    const Foam::DynamicBuffer<GpuProblem>& problemBuffer, 
+    std::vector<Foam::label>& outputLength,
+    std::vector<std::vector<double>>& DNNinputs, 
+    std::vector<Foam::DynamicBuffer<label>>& cellIDBuffer,
+    std::vector<std::vector<label>>& problemCounter
+)
+{
+    std::vector<label> problemCounter0;     // evaluate the number of the problems of each subslave for DNN0
+    std::vector<label> problemCounter1;     // evaluate the number of the problems of each subslave for DNN1
+    std::vector<label> problemCounter2;     // evaluate the number of the problems of each subslave for DNN2
+    std::vector<double> inputsDNN0;         // the vector constructed for inference via DNN0
+    std::vector<double> inputsDNN1;         // the vector constructed for inference via DNN1
+    std::vector<double> inputsDNN2;         // the vector constructed for inference via DNN2
+    DynamicList<label> cellIDList0;         // store the cellID of each problem in each subslave for DNN0
+    DynamicList<label> cellIDList1;         // store the cellID of each problem in each subslave for DNN1
+    DynamicList<label> cellIDList2;         // store the cellID of each problem in each subslave for DNN2
+    DynamicBuffer<label> cellIDList0Buffer; // store the cellIDList0 of each subslave
+    DynamicBuffer<label> cellIDList1Buffer; // store the cellIDList1 of each subslave
+    DynamicBuffer<label> cellIDList2Buffer; // store the cellIDList2 of each subslave
+
+    for (label i = 0; i < coresPerGPU; i++) // for all local core TODO: i may cause misleading
+    {
+        label counter0 = 0;
+        label counter1 = 0;
+        label counter2 = 0;
+        //TODO: parallel the loop
+        for (label cellI = 0; cellI < problemBuffer[i].size(); cellI++) // loop coresPerGPU*problemBuffer[i].size() times
+        {
+            switch (problemBuffer[i][cellI].DNNid) //divide by Dnn id
+            {
+            case 0:
+                inputsDNN0.push_back(problemBuffer[i][cellI].rhoi);
+                inputsDNN0.push_back(problemBuffer[i][cellI].Ti);
+                inputsDNN0.push_back(problemBuffer[i][cellI].pi);
+                for (size_t speciID = 0; speciID < CanteraGas_->nSpecies(); speciID++)
+                {
+                    inputsDNN0.push_back(problemBuffer[i][cellI].Y[speciID]);
+                }
+                counter0++;
+                cellIDList0.append(problemBuffer[i][cellI].cellid); // store cellid for further send back
+                break;
+
+            case 1:
+                inputsDNN1.push_back(problemBuffer[i][cellI].rhoi);
+                inputsDNN1.push_back(problemBuffer[i][cellI].Ti);
+                inputsDNN1.push_back(problemBuffer[i][cellI].pi);
+                for (size_t speciID = 0; speciID < CanteraGas_->nSpecies(); speciID++)
+                {
+                    inputsDNN1.push_back(problemBuffer[i][cellI].Y[speciID]);
+                }
+                counter1++;
+                cellIDList1.append(problemBuffer[i][cellI].cellid);
+                break;
+
+            case 2:
+                inputsDNN2.push_back(problemBuffer[i][cellI].rhoi);
+                inputsDNN2.push_back(problemBuffer[i][cellI].Ti);
+                inputsDNN2.push_back(problemBuffer[i][cellI].pi);
+                for (size_t speciID = 0; speciID < CanteraGas_->nSpecies(); speciID++)
+                {
+                    inputsDNN2.push_back(problemBuffer[i][cellI].Y[speciID]);
+                }
+                counter2++;
+                cellIDList2.append(problemBuffer[i][cellI].cellid);
+                break;
+            
+            default:
+                Info<<"invalid input"<<endl;
+                break;
+            }
+        }
+        problemCounter0.push_back(counter0); //count number of inputs mapped to each dnn
+        problemCounter1.push_back(counter1);
+        problemCounter2.push_back(counter2);
+        cellIDList0Buffer.append(cellIDList0);
+        cellIDList1Buffer.append(cellIDList1);
+        cellIDList2Buffer.append(cellIDList2);
+        cellIDList0.clear();
+        cellIDList1.clear();
+        cellIDList2.clear();
+    }
+
+    // get cellNumbers for each model
+    label length0 = std::accumulate(problemCounter0.begin(), problemCounter0.end(), 0);
+    label length1 = length0 + std::accumulate(problemCounter1.begin(), problemCounter1.end(), 0);
+    label length2 = length1 + std::accumulate(problemCounter2.begin(), problemCounter2.end(), 0);
+
+    // set output
+    outputLength = {length0, length1, length2};
+    DNNinputs = {inputsDNN0, inputsDNN1, inputsDNN2};
+    cellIDBuffer = {cellIDList0Buffer, cellIDList1Buffer, cellIDList2Buffer};
+    problemCounter = {problemCounter0, problemCounter1, problemCounter2};
+
+    Info<<"get inputs successfully"<<endl;
+
+    return;
+}
+
+template <class ThermoType>
+void Foam::dfChemistryModel<ThermoType>::updateSolutionBuffer
+(
+    Foam::DynamicBuffer<Foam::GpuSolution>& solutionBuffer, 
+    const double* star,
+    const std::vector<Foam::label>& outputLength,
+    const std::vector<Foam::DynamicBuffer<Foam::label>>& cellIDBuffer,
+    std::vector<std::vector<Foam::label>>& problemCounter
+)
+{
+    std::vector<double> outputsVec0(star, star+outputLength[0] * 21); //the float number is sample_length*sample_number
+    std::vector<double> outputsVec1(star+outputLength[0] * 21, star+outputLength[1] * 21);
+    std::vector<double> outputsVec2(star+outputLength[1] * 21, star+outputLength[2] * 21);
+
+    GpuSolution solution(CanteraGas_->nSpecies());
+    DynamicList<GpuSolution> solutionList; //TODO: rename
+
+    label outputCounter0 = 0;
+    label outputCounter1 = 0;
+    label outputCounter2 = 0;
+
+    for (label i = 0; i < coresPerGPU; i++) //TODO: i may cause misleading
+    {
+        for (size_t cellI = 0; cellI < problemCounter[0][i]; cellI++)
+        {
+            for (size_t speciID = 0; speciID < CanteraGas_->nSpecies(); speciID++)
+            {
+                solution.RRi[speciID] = outputsVec0[outputCounter0 * mixture_.nSpecies() + speciID];
+            }
+            solution.cellid = cellIDBuffer[0][i][cellI]; //cellid are sequential so that's fine
+            solutionList.append(solution);
+            outputCounter0++;
+        }
+        for (size_t cellI = 0; cellI < problemCounter[1][i]; cellI++)
+        {
+            for (size_t speciID = 0; speciID < CanteraGas_->nSpecies(); speciID++)
+            {
+                solution.RRi[speciID] = outputsVec1[outputCounter1 * mixture_.nSpecies() + speciID];
+            }
+            solution.cellid = cellIDBuffer[1][i][cellI];
+            solutionList.append(solution);
+            outputCounter1++;
+        }
+        for (size_t cellI = 0; cellI < problemCounter[2][i]; cellI++)
+        {
+            for (size_t speciID = 0; speciID < CanteraGas_->nSpecies(); speciID++)
+            {
+                solution.RRi[speciID] = outputsVec2[outputCounter2 * mixture_.nSpecies() + speciID];
+            }
+            solution.cellid = cellIDBuffer[2][i][cellI];
+            solutionList.append(solution);
+            outputCounter2++;
+        }
+    solutionBuffer.append(solutionList);
+    solutionList.clear();
+    }
+    return;
+}
+
+template <class ThermoType>
+template <class DeltaTType>
+Foam::scalar Foam::dfChemistryModel<ThermoType>::torchDCUSolve(
+    const DeltaTType &deltaT)
+{
+    scalar deltaTMin = great;
+    // set the cores slaved by a DCU
+    if (!this->chemistry_)
+    {
+        return deltaTMin;
+    }
+
+    Info << "=== begin torchDCUsolve === " << endl;
+
+    /*=============================gather problems=============================*/
+    DynamicList<GpuProblem> problemList = getGPUProblems(deltaT);
+
+    /*==============================send problems==============================*/
+    PstreamBuffers pBufs(Pstream::commsTypes::nonBlocking); 
+    if (Pstream::myProcNo() % coresPerGPU) //for slave 
+    {
+        UOPstream send((Pstream::myProcNo()/coresPerGPU)*coresPerGPU, pBufs);// sending problem to master
+        send << problemList;
+    }
+    pBufs.finishedSends();
+
+    DynamicBuffer<GpuSolution> solutionBuffer;
+
+    /*=============================submaster work start=============================*/
+    if (!(Pstream::myProcNo() % coresPerGPU))
+    {
+        label problemSize = 0; // problemSize is defined to debug
+        DynamicBuffer<GpuProblem> problemBuffer(coresPerGPU);//each submaster init a local problemBuffer TODO:rename it
+
+        /*==============================gather problems==============================*/
+        problemBuffer[0] = problemList; //problemList of submaster get index 0
+        problemSize += problemBuffer[0].size();
+
+        for (label i = 1; i < coresPerGPU; i++)
+        {
+            UIPstream recv(i + Pstream::myProcNo(), pBufs);
+            recv >> problemBuffer[i];  //recv previous send problem and append to problemList
+            problemSize += problemBuffer[i].size();
+        }
+        Info << "problemSize = " << problemSize << endl;
+
+        /*==============================construct DNN inputs==============================*/
+        std::vector<label> outputLength;
+        std::vector<std::vector<double>> DNNinputs;     // vectors for the inference of DNN
+        std::vector<DynamicBuffer<label>> cellIDBuffer; // Buffer contains the cell numbers
+        std::vector<std::vector<label>> problemCounter; // evaluate the number of the problems of each subslave
+
+        getDNNinputs(problemBuffer, outputLength, DNNinputs, cellIDBuffer, problemCounter);
+
+        /*=============================inference via pybind11=============================*/
+        pybind11::array_t<double> vec0 = pybind11::cast(DNNinputs[0]); // cast vector to np.array
+        pybind11::array_t<double> vec1 = pybind11::cast(DNNinputs[1]);
+        pybind11::array_t<double> vec2 = pybind11::cast(DNNinputs[2]);
+        pybind11::module_ call_torch = pybind11::module_::import("inference2"); // import python file
+        pybind11::object result = call_torch.attr("inference")(vec0, vec1, vec2); // call function
+        const double* star = result.cast<pybind11::array_t<double>>().data();
+
+        /*=============================construct solutions=============================*/
+        updateSolutionBuffer(solutionBuffer, star, outputLength, cellIDBuffer, problemCounter);
+        
+    }
+
+    /*=============================send and recv solutions=============================*/
+    DynamicList<GpuSolution> finalList;
+    PstreamBuffers pBufs2(Pstream::commsTypes::nonBlocking);
+    if (!(Pstream::myProcNo() % coresPerGPU))
+    {
+        finalList = solutionBuffer[0];
+        for (label i = 1; i < coresPerGPU; i++)
+        {
+            UOPstream send(i + Pstream::myProcNo(), pBufs2);
+            send << solutionBuffer[i];
+        }
+    }
+    pBufs2.finishedSends();
+    if (Pstream::myProcNo() % coresPerGPU)
+    {
+        UIPstream recv((Pstream::myProcNo()/coresPerGPU)*coresPerGPU, pBufs2);
+        recv >> finalList;
+    }
+
+    /*=============================update RR fields=============================*/
+    for (size_t cellI = 0; cellI < finalList.size(); cellI++)
+    {
+        for (size_t speciID = 0; speciID < CanteraGas_->nSpecies(); speciID++)
+        {
+            RR_[speciID][finalList[cellI].cellid] = finalList[cellI].RRi[speciID];
+            Qdot_[finalList[cellI].cellid] -= hc_[speciID] * RR_[speciID][finalList[cellI].cellid];
+        }
+    }
+
+    Info << "=== end torch&ode-CUDAsolve === " << endl;
     return deltaTMin;
 }
 
+
 template<class ThermoType>
 void Foam::dfChemistryModel<ThermoType>::correctThermo()
 {
@@ -656,7 +949,6 @@ void Foam::dfChemistryModel<ThermoType>::solveSingle
     const scalar pi = problem.pi;
     const scalar rhoi = problem.rhoi;
     const scalarList yPre_ = problem.Y;
-    scalar Qdoti_ = 0;
 
     CanteraGas_->setState_TPY(Ti, pi, yPre_.begin());
 
@@ -673,12 +965,10 @@ void Foam::dfChemistryModel<ThermoType>::solveSingle
     for (size_t i=0; i<CanteraGas_->nSpecies(); i++)
     {
         solution.RRi[i] = (yTemp_[i] - yPre_[i]) / problem.deltaT * rhoi;
-        Qdoti_ -= hc_[i]*solution.RRi[i];
     }
 
     // Timer ends
     solution.cpuTime = time.timeIncrement();
-    solution.Qdoti = Qdoti_;
 
     solution.cellid = problem.cellid;
 }
@@ -722,7 +1012,7 @@ Foam::dfChemistryModel<ThermoType>::getProblems
 
             solved_problems[celli] = problem;
         }
-
+        
     }
 
     return solved_problems;
@@ -784,7 +1074,6 @@ Foam::dfChemistryModel<ThermoType>::updateReactionRates
             {
                 this->RR_[j][solution.cellid] = solution.RRi[j];
             }
-            this->Qdot_[solution.cellid] = solution.Qdoti;
 
             cpuTimes_[solution.cellid] = solution.cpuTime;
         }
diff --git a/src/dfChemistryModel/dfChemistryModel.H b/src/dfChemistryModel/dfChemistryModel.H
index 7bd6b7cb..6fedc9f2 100644
--- a/src/dfChemistryModel/dfChemistryModel.H
+++ b/src/dfChemistryModel/dfChemistryModel.H
@@ -38,7 +38,10 @@ SourceFiles
 #ifndef dfChemistryModel_H
 #define dfChemistryModel_H
 
-#include <torch/script.h>
+// #include <torch/script.h>
+#include <pybind11/embed.h>
+#include <pybind11/numpy.h>
+#include <pybind11/stl.h> //used to convert
 #include "CanteraMixture.H"
 #include "IOdictionary.H"
 #include "Switch.H"
@@ -52,8 +55,15 @@ SourceFiles
 #include "RecvBuffer.H"
 #include "SendBuffer.H"
 #include "LoadBalancer.H"
+#include "GpuProblem.H"
+#include "GpuSolution.H"
+#include "DynamicBuffer.H"
 #include "OFstream.H"
 #include "IOmanip.H"
+#include "PstreamGlobals.H"
+
+// extern pybind11::module_ call_torch;
+
 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 
 namespace Foam
@@ -115,8 +125,35 @@ public IOdictionary
     scalarList Xstd_;
     scalarList Ymu_;
     scalarList Ystd_;
-    scalar Tact_;
+    scalar Tact_low;
+    scalar Tact_high;
     scalar Qdotact_;
+    // GpuInference CUDA_;
+    int coresPerGPU;
+
+    word torchModelName1_;
+    scalarList Xmu1_;  //member function
+    scalarList Xstd1_;
+    scalarList Ymu1_;
+    scalarList Ystd1_;
+    scalar Tact1_;
+    scalar Qdotact1_;
+    
+    word torchModelName2_;
+    scalarList Xmu2_;  //member function
+    scalarList Xstd2_;
+    scalarList Ymu2_;
+    scalarList Ystd2_;
+    scalar Tact2_;
+    scalar Qdotact2_;
+    
+    word torchModelName3_;  
+    scalarList Xmu3_;  //member function
+    scalarList Xstd3_;
+    scalarList Ymu3_;
+    scalarList Ystd3_;
+    scalar Tact3_;
+    scalar Qdotact3_;
 
     // Load balancing object
     LoadBalancer balancer_;
@@ -135,49 +172,42 @@ public IOdictionary
         //- Disallow default bitwise assignment
         void operator=(const dfChemistryModel&);
 
-        //- override for Cantera
+        // override for Cantera
         template<class DeltaTType>
         scalar solve(const DeltaTType& deltaT);
 
         template<class DeltaTType>
         scalar canteraSolve(const DeltaTType& deltaT);
 
-        //- Solve the reaction system with Deep Neural Networks
         template<class DeltaTType>
         scalar torchSolve(const DeltaTType& deltaT);
 
-        //- Solve a single ChemistryProblem and put the solution to ChemistrySolution
-        void solveSingle(ChemistryProblem& problem, ChemistrySolution& solution);
-
-        //- Get the list of problems to be solved
         template<class DeltaTType>
-        DynamicList<ChemistryProblem> getProblems(const DeltaTType& deltaT);
-
-        //- Solve a list of chemistry problems and return a list of solutions
-        DynamicList<ChemistrySolution>
-        solveList(UList<ChemistryProblem>& problems);
+        scalar torchMultiDNNSolve(const DeltaTType& deltaT);
 
-        //- Solve the problem buffer coming from the balancer
-        RecvBuffer<ChemistrySolution>
-        solveBuffer(RecvBuffer<ChemistryProblem>& problems);
-
-        //- Update the reaction rates and heat release rates from a list of solutions
-        scalar updateReactionRates(const RecvBuffer<ChemistrySolution>& solutions);
+        template<class DeltaTType>
+        scalar torchCUDAoneCoreSolve(const DeltaTType& deltaT);
+        
+        template<class DeltaTType>
+        scalar torchDCUSolve(const DeltaTType& deltaT);
 
-        //- Create a load balancer object
-        LoadBalancer createBalancer();
+        template<class DeltaTType>
+        scalar torchCUDA0DSolve(const DeltaTType& deltaT);
 
-        //- Solve the reaction system with DLB algorithm
+        /*=======================private methods for DCUSolve=======================*/
+        //- get a list of GPU problems to be solved
         template<class DeltaTType>
-        scalar solve_loadBalance(const DeltaTType& deltaT);
+        DynamicList<GpuProblem> getGPUProblems(const DeltaTType& deltaT);
 
-        //- Output logFiles
-        Foam::autoPtr<Foam::OFstream> logFile(const word& name) const
-        {
-            mkDir(mesh_.time().path() / "loadBal" / this->group());
-            return autoPtr<OFstream>(new OFstream(
-                mesh_.time().path() / "loadBal" / this->group() / name));
-        }
+        //- get the input for DNN inference
+        void getDNNinputs(const DynamicBuffer<GpuProblem>& problemBuffer, std::vector<label>& outputlength,
+        std::vector<std::vector<double>>& DNNinputs, std::vector<DynamicBuffer<label>>& cellIDBuffer, 
+        std::vector<std::vector<label>>& problemCounter);
+
+        //- construct the output
+        void updateSolutionBuffer(DynamicBuffer<GpuSolution>& solutionBuffer, const double* star,
+        const std::vector<label>& outputLength, const std::vector<DynamicBuffer<label>>& cellIDBuffer,
+        std::vector<std::vector<label>>& problemCounter);
 
 public:
 
@@ -222,7 +252,7 @@ public:
 
         const volScalarField& rhoD(const label i) const {return rhoD_[i];}
 
-        const volScalarField& hai(const label i) {return hai_[i];}
+        volScalarField& hai(const label i) {return hai_[i];}
 
         // update T, psi, mu, alpha, rhoD, hai (if needed)
         void correctThermo();
@@ -230,6 +260,35 @@ public:
         ThermoType& thermo() {return thermo_;}
 
         const CanteraMixture& mixture() {return mixture_;}
+
+        void solveSingle(ChemistryProblem& problem, ChemistrySolution& solution);
+
+        template<class DeltaTType>
+        DynamicList<ChemistryProblem> getProblems(const DeltaTType& deltaT);
+
+        //- Solve a list of chemistry problems and return a list of solutions
+        DynamicList<ChemistrySolution>
+        solveList(UList<ChemistryProblem>& problems);
+
+        //- Solve the problem buffer coming from the balancer
+        RecvBuffer<ChemistrySolution>
+        solveBuffer(RecvBuffer<ChemistryProblem>& problems);
+
+        //- Update the reaction rates from a list of solutions
+        scalar updateReactionRates(const RecvBuffer<ChemistrySolution>& solutions);
+
+        //- Create a load balancer object
+        LoadBalancer createBalancer();
+
+        template<class DeltaTType>
+        scalar solve_loadBalance(const DeltaTType& deltaT);
+
+        Foam::autoPtr<Foam::OFstream> logFile(const word& name) const
+        {
+            mkDir(mesh_.time().path() / "loadBal" / this->group());
+            return autoPtr<OFstream>(new OFstream(
+                mesh_.time().path() / "loadBal" / this->group() / name));
+        }
 };
 
 
diff --git a/src/dfCombustionModels/Make/options b/src/dfCombustionModels/Make/options
index 78b4e8ea..f060546e 100644
--- a/src/dfCombustionModels/Make/options
+++ b/src/dfCombustionModels/Make/options
@@ -1,4 +1,12 @@
+-include $(GENERAL_RULES)/mplibType
+PYTHON_INC_DIR := $(shell python3 -m pybind11 --includes)
+PYTHON_LIB_DIR := $(shell python3 -c "from distutils import sysconfig; \
+    import os.path as op; v = sysconfig.get_config_vars(); \
+    fpaths = [op.join(v[pv], v['LDLIBRARY']) for pv in ('LIBDIR', 'LIBPL')]; \
+    print(list(filter(op.exists, fpaths))[0])" | xargs dirname)
+
 EXE_INC = -std=c++14 \
+    $(PFLAGS) $(PINC) \
     -I$(LIB_SRC)/transportModels/compressible/lnInclude \
     -I$(LIB_SRC)/thermophysicalModels/basic/lnInclude \
     -I$(LIB_SRC)/TurbulenceModels/turbulenceModels/lnInclude \
@@ -8,8 +16,8 @@ EXE_INC = -std=c++14 \
     -I$(DF_SRC)/CanteraMixture/lnInclude \
     -I$(DF_SRC)/dfChemistryModel/lnInclude \
     -I$(CANTERA_ROOT)/include \
-    -I$(TORCH_ROOT)/include \
-    -I$(TORCH_ROOT)/include/torch/csrc/api/include
+    -I$(LIB_SRC)/Pstream/mpi \
+    $(PYTHON_INC_DIR)
 
 LIB_LIBS = \
     -lcompressibleTransportModels \
@@ -17,12 +25,11 @@ LIB_LIBS = \
     -lfiniteVolume \
     -lmeshTools \
     -L$(FOAM_USER_LIBBIN) \
+    -L$(PYTHON_LIB_DIR) \
     -ldfCompressibleTurbulenceModels \
     -lCanteraMixture \
     -ldfChemistryModel \
     $(CANTERA_ROOT)/lib/libcantera.so \
-    $(TORCH_ROOT)/lib/libtorch.so \
-    $(TORCH_ROOT)/lib/libc10.so \
     -rdynamic \
     -lpthread