<a href="https://colab.research.google.com/github/ap15032005/final-project-tc-vc/blob/main/thermal_conductivity_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**DATA SET**

In [1]:
from google.colab import files

uploaded = files.upload()


Saving thermal_conductivity_final dataset.xlsm to thermal_conductivity_final dataset.xlsm


In [31]:
import os
os.listdir()


['.config', 'thermal_conductivity_final dataset.xlsm', 'sample_data']

In [3]:
import pandas as pd

tc_data = pd.read_excel("thermal_conductivity_final dataset.xlsm")
tc_data.head()


Unnamed: 0,Y_thermal_conductivity,X1_Temperature_C,X2_Concentration,X3_BaseFluid
0,0.2582,30,0.5,0.251
1,0.2632,35,0.5,0.253
2,0.2676,40,0.5,0.255
3,0.271,45,0.5,0.257
4,0.275,50,0.5,0.259


**MULTIPLE LINEAR REGRESSION MODEL**

Y_thermal_conductivity   ‚Üí Output (y)
X1_Temperature_C         ‚Üí Input 1 (T)
X2_Concentration         ‚Üí Input 2 (œÜ)
X3_BaseFluid             ‚Üí ‚ùå NOT USED for MLR (constant / base fluid)


In [4]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [5]:
# Independent variables (as per paper)
X = tc_data[['X1_Temperature_C', 'X2_Concentration']]

# Dependent variable
y = tc_data['Y_thermal_conductivity']


In [6]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)


In [8]:
mlr_model = LinearRegression()
mlr_model.fit(X_train, y_train)


In [9]:
y_train_pred = mlr_model.predict(X_train)
y_test_pred = mlr_model.predict(X_test)


In [10]:
def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, mae, r2

print("TRAINING RESULTS")
print("MSE, RMSE, MAE, R2 =", evaluate(y_train, y_train_pred))

print("\nTESTING RESULTS")
print("MSE, RMSE, MAE, R2 =", evaluate(y_test, y_test_pred))


TRAINING RESULTS
MSE, RMSE, MAE, R2 = (0.00043030429788947725, np.float64(0.020743777329345715), 0.014119376201651691, 0.2573464328697954)

TESTING RESULTS
MSE, RMSE, MAE, R2 = (0.00012422948350999008, np.float64(0.011145828076459375), 0.009028764349423627, 0.42857567063440316)


In [11]:
a = mlr_model.coef_[0]   # Temperature coefficient
b = mlr_model.coef_[1]   # Concentration coefficient
c = mlr_model.intercept_

print(f"Final MLR Equation:")
print(f"k = {a:.6f} * T + {b:.6f} * œÜ + {c:.6f}")


Final MLR Equation:
k = 0.053319 * T + 0.002954 * œÜ + 0.253817


**Multilayer feedforward artificial neural network "BP"**

In [12]:
from sklearn.neural_network import MLPRegressor


In [13]:
X_scaled
X_train, X_test, y_train, y_test


(array([[1.        , 0.09090909],
        [0.13888889, 0.05050505],
        [0.02777778, 1.        ],
        [0.48611111, 0.24242424],
        [0.58333333, 0.5959596 ],
        [0.27777778, 0.38525253],
        [0.20833333, 0.06060606],
        [0.27777778, 0.22080808],
        [0.27777778, 0.        ],
        [0.        , 0.03030303],
        [0.72222222, 0.5959596 ],
        [0.        , 0.49494949],
        [0.20833333, 0.        ],
        [0.06944444, 0.27565657],
        [0.06944444, 0.38525253],
        [0.34722222, 0.02020202],
        [0.41666667, 0.04040404],
        [0.20833333, 0.24242424],
        [0.48611111, 0.04040404],
        [0.41666667, 0.09090909],
        [0.20833333, 0.19191919],
        [0.        , 0.        ],
        [0.86111111, 0.09090909],
        [0.13888889, 0.19191919],
        [1.        , 0.7979798 ],
        [0.41666667, 0.02020202],
        [0.48611111, 0.14141414],
        [0.06944444, 0.09090909],
        [0.34722222, 0.49494949],
        [0.277

In [14]:
bp_model = MLPRegressor(
    hidden_layer_sizes=(20,),
    activation='tanh',   # üî¥ important change
    solver='adam',
    max_iter=5000,
    random_state=42
)

bp_model.fit(X_train, y_train)



In [15]:
y_train_pred_bp = bp_model.predict(X_train)
y_test_pred_bp = bp_model.predict(X_test)


In [16]:
print("TRAINING RESULTS (BP-ANN)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_train, y_train_pred_bp))

print("\nTESTING RESULTS (BP-ANN)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_test, y_test_pred_bp))


TRAINING RESULTS (BP-ANN)
MSE, RMSE, MAE, R2 = (0.0021996473573473447, np.float64(0.046900398264272176), 0.03132005276996915, -2.7963273069192396)

TESTING RESULTS (BP-ANN)
MSE, RMSE, MAE, R2 = (0.0012843998984392586, np.float64(0.03583852533851328), 0.02619868220561162, -4.907915978286051)


**LEAST SQUARES SUPPORT VECTOR MACHINE (LS-SVM)**

‚ö†Ô∏è Important note (very important):
sklearn does NOT directly provide LS-SVM.
So in research papers (including your reference), LS-SVM is implemented using SVR with RBF kernel + proper regularization, which is accepted in journals.

We will:

Use RBF kernel

Tune C (regularization) and gamma (œÉ¬≤ equivalent)

Use GridSearchCV (as in the paper)


In [17]:
# Inputs
X = tc_data[['X1_Temperature_C', 'X2_Concentration']]

# Output
y = tc_data['Y_thermal_conductivity'].values.reshape(-1, 1)


In [18]:
#SCALE BOTH X AND y (CRITICAL)
from sklearn.preprocessing import MinMaxScaler

X_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

X_scaled = X_scaler.fit_transform(X)
y_scaled = y_scaler.fit_transform(y).ravel()


In [19]:
#TRAIN‚ÄìTEST SPLIT
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=42
)

In [20]:
#LS-SVM WITH GRID SEARCH (RBF KERNEL)
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

svr = SVR(kernel='rbf')

param_grid = {
    'C': [10, 50, 100, 500, 1000],
    'gamma': [0.01, 0.05, 0.1, 0.2]
}

grid = GridSearchCV(
    svr,
    param_grid,
    cv=5,
    scoring='neg_mean_squared_error',
    n_jobs=-1
)

grid.fit(X_train, y_train)


In [21]:
best_lssvm = grid.best_estimator_

print("Best LS-SVM parameters:")
print(grid.best_params_)


Best LS-SVM parameters:
{'C': 500, 'gamma': 0.2}


In [22]:
#PREDICT (INVERSE TRANSFORM)
# Scaled predictions
y_train_pred_scaled = best_lssvm.predict(X_train)
y_test_pred_scaled = best_lssvm.predict(X_test)

# Convert back to original scale
y_train_pred = y_scaler.inverse_transform(
    y_train_pred_scaled.reshape(-1, 1)
).ravel()

y_test_pred = y_scaler.inverse_transform(
    y_test_pred_scaled.reshape(-1, 1)
).ravel()

# True values (original scale)
y_train_true = y_scaler.inverse_transform(
    y_train.reshape(-1, 1)
).ravel()

y_test_true = y_scaler.inverse_transform(
    y_test.reshape(-1, 1)
).ravel()


In [23]:
print("TRAINING RESULTS (LS-SVM)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_train_true, y_train_pred))

print("\nTESTING RESULTS (LS-SVM)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_test_true, y_test_pred))


TRAINING RESULTS (LS-SVM)
MSE, RMSE, MAE, R2 = (0.00040552745606829406, np.float64(0.020137712284872232), 0.013556435148114061, 0.30010828779658183)

TESTING RESULTS (LS-SVM)
MSE, RMSE, MAE, R2 = (0.00010635773597565568, np.float64(0.010312988702391546), 0.008446627203463527, 0.5107812072015504)


RANDOM FOREST REGRESSION ‚Äî THERMAL CONDUCTIVITY


In [24]:
# Inputs
X = tc_data[['X1_Temperature_C', 'X2_Concentration']]

# Output
y = tc_data['Y_thermal_conductivity']


In [25]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [26]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

rf = RandomForestRegressor(
    random_state=42
)

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid = GridSearchCV(
    rf,
    param_grid,
    cv=5,
    scoring='r2',
    n_jobs=-1
)

grid.fit(X_train, y_train)


In [27]:
print("Best Random Forest parameters (TC):")
print(grid.best_params_)

best_rf_tc = grid.best_estimator_


Best Random Forest parameters (TC):
{'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 300}


In [28]:
y_train_pred_rf = best_rf_tc.predict(X_train)
y_test_pred_rf = best_rf_tc.predict(X_test)


In [29]:
print("TRAINING RESULTS (RF ‚Äì TC)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_train, y_train_pred_rf))

print("\nTESTING RESULTS (RF ‚Äì TC)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_test, y_test_pred_rf))


TRAINING RESULTS (RF ‚Äì TC)
MSE, RMSE, MAE, R2 = (0.00013473149774117274, np.float64(0.011607389790179907), 0.007472817540882793, 0.7674696072220573)

TESTING RESULTS (RF ‚Äì TC)
MSE, RMSE, MAE, R2 = (0.00020765747227489018, np.float64(0.014410325196708441), 0.00979596275407283, 0.04482793874859925)


In [30]:
import pandas as pd

feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': best_rf_tc.feature_importances_
})

feature_importance


Unnamed: 0,Feature,Importance
0,X1_Temperature_C,0.639308
1,X2_Concentration,0.360692
