In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [None]:
df = pd.read_csv("/kaggle/input/critical-heat-flux-prediction/Data_CHF_Zhao_2020_ATE.csv")

df.shape

In [None]:
df.head(3)

In [None]:
df.info()

In [None]:
## check missing values

df.isnull().sum()

## Preprocessing 

In [None]:
def preprocess_inputs(df):
    # Drop "id" and "author" columns
    df = df.drop(columns=["id", "author"], axis=1)
    
    # Remove duplicate records
    df = df.drop_duplicates()
    
    # Rename columns
    df = df.rename(columns={
        'pressure [MPa]': 'pressure_mp',
        'mass_flux [kg/m2-s]': 'mass_flux',
        'x_e_out [-]': 'xe_out',
        'D_e [mm]': 'de_mm',
        'D_h [mm]': 'dh_mm',
        'length [mm]': 'length',
        'chf_exp [MW/m2]': 'chf_exp'
    })
    
    # Split the dataset into X and y
    X = df.drop(["xe_out"], axis=1)
    y = df["xe_out"]
    
    return X, y

In [None]:
X, y = preprocess_inputs(df)

In [None]:
X.head(3)

In [None]:
y

In [None]:
# categorical columns and numeric columns

categorical_columns = ['geometry']  

numeric_columns = [col for col in X.columns if col not in categorical_columns]

## Split the data into training and test

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.shape

## Define Pipeline

In [None]:
# Define transformers for numeric and categorical features
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())  
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder())  
])

# Create a column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_columns),
        ('cat', categorical_transformer, categorical_columns)
    ])

# Fit and transform the preprocessor on the training data
X_train_transformed = preprocessor.fit_transform(X_train)

# Transform the testing data
X_test_transformed = preprocessor.transform(X_test)

## Build the ANN Model using Keras

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_transformed.shape[1],)),  
    tf.keras.layers.Dense(100, activation='relu'),    
    tf.keras.layers.Dense(50, activation='relu'),     
    tf.keras.layers.Dense(1) # Output layer 
])

model.summary()

## Compile the Model

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')

## Fit the Model

In [None]:
model.fit(X_train_transformed, y_train, epochs=50, batch_size=32)

## Model Evaluation

In [None]:
y_pred = model.predict(X_test_transformed)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

In [None]:
print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared (R2) Score: {r2}")

## Save the Model

In [None]:
# Save the trained model using TensorFlow's SavedModel format
model.save('ANN_model')

# Save the preprocessor using joblib (for later preprocessing of new data)
joblib.dump(preprocessor, 'preprocessor.pkl')

## Load the Model

In [None]:
import joblib
import numpy as np
import tensorflow as tf

# Load the preprocessor
preprocessor = joblib.load('preprocessor.pkl')

# Load the trained model
model = tf.keras.models.load_model('ANN_model')

model