In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/critical-heat-flux-prediction/Data_CHF_Zhao_2020_ATE.csv


In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
import joblib



In [3]:
df = pd.read_csv("/kaggle/input/critical-heat-flux-prediction/Data_CHF_Zhao_2020_ATE.csv")

df.shape

(1865, 10)

In [4]:
df.head(3)

Unnamed: 0,id,author,geometry,pressure [MPa],mass_flux [kg/m2-s],x_e_out [-],D_e [mm],D_h [mm],length [mm],chf_exp [MW/m2]
0,1,Inasaka,tube,0.39,5600,-0.1041,3.0,3.0,100,11.3
1,2,Inasaka,tube,0.31,6700,-0.0596,3.0,3.0,100,10.6
2,3,Inasaka,tube,0.33,4300,-0.0395,3.0,3.0,100,7.3


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1865 entries, 0 to 1864
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   id                   1865 non-null   int64  
 1   author               1865 non-null   object 
 2   geometry             1865 non-null   object 
 3   pressure [MPa]       1865 non-null   float64
 4   mass_flux [kg/m2-s]  1865 non-null   int64  
 5   x_e_out [-]          1865 non-null   float64
 6   D_e [mm]             1865 non-null   float64
 7   D_h [mm]             1865 non-null   float64
 8   length [mm]          1865 non-null   int64  
 9   chf_exp [MW/m2]      1865 non-null   float64
dtypes: float64(5), int64(3), object(2)
memory usage: 145.8+ KB


In [6]:
## check missing values

df.isnull().sum()

id                     0
author                 0
geometry               0
pressure [MPa]         0
mass_flux [kg/m2-s]    0
x_e_out [-]            0
D_e [mm]               0
D_h [mm]               0
length [mm]            0
chf_exp [MW/m2]        0
dtype: int64

## Preprocessing 

In [7]:
def preprocess_inputs(df):
    # Drop "id" and "author" columns
    df = df.drop(columns=["id", "author"], axis=1)
    
    # Remove duplicate records
    df = df.drop_duplicates()
    
    # Rename columns
    df = df.rename(columns={
        'pressure [MPa]': 'pressure_mp',
        'mass_flux [kg/m2-s]': 'mass_flux',
        'x_e_out [-]': 'xe_out',
        'D_e [mm]': 'de_mm',
        'D_h [mm]': 'dh_mm',
        'length [mm]': 'length',
        'chf_exp [MW/m2]': 'chf_exp'
    })
    
    # Split the dataset into X and y
    X = df.drop(["xe_out"], axis=1)
    y = df["xe_out"]
    
    return X, y

In [8]:
X, y = preprocess_inputs(df)

In [9]:
X.head(3)

Unnamed: 0,geometry,pressure_mp,mass_flux,de_mm,dh_mm,length,chf_exp
0,tube,0.39,5600,3.0,3.0,100,11.3
1,tube,0.31,6700,3.0,3.0,100,10.6
2,tube,0.33,4300,3.0,3.0,100,7.3


In [10]:
y

0      -0.1041
1      -0.0596
2      -0.0395
3      -0.1460
4      -0.0849
         ...  
1860   -0.0218
1861   -0.0434
1862   -0.0109
1863   -0.0218
1864   -0.0434
Name: xe_out, Length: 1723, dtype: float64

In [11]:
# categorical columns and numeric columns

categorical_columns = ['geometry']  

numeric_columns = [col for col in X.columns if col not in categorical_columns]

## Split the data into training and test

In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
X_train.shape

(1378, 7)

## Define Pipeline

In [14]:
# Define transformers for numeric and categorical features
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())  
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder())  
])

# Create a column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_columns),
        ('cat', categorical_transformer, categorical_columns)
    ])

# Fit and transform the preprocessor on the training data
X_train_transformed = preprocessor.fit_transform(X_train)

# Transform the testing data
X_test_transformed = preprocessor.transform(X_test)

## Build the ANN Model using Keras

In [15]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_transformed.shape[1],)),  
    tf.keras.layers.Dense(100, activation='relu'),    
    tf.keras.layers.Dense(50, activation='relu'),     
    tf.keras.layers.Dense(1) # Output layer 
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               1000      
                                                                 
 dense_1 (Dense)             (None, 50)                5050      
                                                                 
 dense_2 (Dense)             (None, 1)                 51        
                                                                 
Total params: 6,101
Trainable params: 6,101
Non-trainable params: 0
_________________________________________________________________


## Compile the Model

In [16]:
model.compile(optimizer='adam', loss='mean_squared_error')

## Fit the Model

In [17]:
model.fit(X_train_transformed, y_train, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x78e52dc1bf40>

## Model Evaluation

In [18]:
y_pred = model.predict(X_test_transformed)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)



In [19]:
print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared (R2) Score: {r2}")

Root Mean Squared Error: 0.04686166811249565
R-squared (R2) Score: 0.8597119574940745


## Save the Model

In [20]:
# Save the trained model using TensorFlow's SavedModel format
model.save('ANN_model')

# Save the preprocessor using joblib (for later preprocessing of new data)
joblib.dump(preprocessor, 'preprocessor.pkl')

['preprocessor.pkl']

## Load the Model

In [21]:
import joblib
import numpy as np
import tensorflow as tf

# Load the preprocessor
preprocessor = joblib.load('preprocessor.pkl')

# Load the trained model
model = tf.keras.models.load_model('ANN_model')

model

<keras.engine.sequential.Sequential at 0x78e52c1a7940>