<a href="https://colab.research.google.com/github/ap15032005/final-project-tc-vc/blob/main/viscosity_model%204%20models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import files

uploaded = files.upload()


Saving viscosity_final dataset.xlsm to viscosity_final dataset.xlsm


In [5]:
import os
os.listdir()


['.config', 'viscosity_final dataset.xlsm', 'sample_data']

In [6]:
import pandas as pd

vis_data = pd.read_excel("viscosity_final dataset.xlsm")
vis_data.head()


Unnamed: 0,Y_viscosity,X1_Temperature_C,X2_Concentration,X3_BaseFluid_viscosity
0,0.0169,25,0.0,0.0169
1,0.01792,25,0.5,0.0169
2,0.01904,25,1.0,0.0169
3,0.02056,25,1.5,0.0169
4,0.02279,25,2.0,0.0169


GAUSSIAN PROCESS REGRESSION (GPR)

In [29]:
import numpy as np
import pandas as pd

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, WhiteKernel
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [30]:
X = vis_data[['X1_Temperature_C',
              'X2_Concentration',
              'X3_BaseFluid_viscosity']]

y = vis_data['Y_viscosity'].values.reshape(-1, 1)


In [31]:
# STEP 3: Initialize scalers
X_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

# STEP 3: Scale input and output data
X_scaled = X_scaler.fit_transform(X)
y_scaled = y_scaler.fit_transform(y).ravel()


In [32]:
# STEP 4: Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=42
)


In [33]:
# STEP 5: Define kernel (Constant × RBF + Noise)
kernel = C(1.0, (1e-3, 1e3)) * \
         RBF(length_scale=[1.0, 1.0, 1.0],
             length_scale_bounds=(1e-2, 1e2)) + \
         WhiteKernel(noise_level=1e-6)


In [34]:
# STEP 6: Initialize Gaussian Process Regressor
gpr = GaussianProcessRegressor(
    kernel=kernel,
    n_restarts_optimizer=10,
    random_state=42
)

# STEP 6: Train the model
gpr.fit(X_train, y_train)




In [35]:
# STEP 7: Predict on training and testing data (scaled)
y_train_pred_scaled = gpr.predict(X_train)
y_test_pred_scaled = gpr.predict(X_test)

# STEP 7: Convert predictions back to original scale
y_train_pred = y_scaler.inverse_transform(
    y_train_pred_scaled.reshape(-1, 1)
).ravel()

y_test_pred = y_scaler.inverse_transform(
    y_test_pred_scaled.reshape(-1, 1)
).ravel()

# STEP 7: Convert true y values back to original scale
y_train_true = y_scaler.inverse_transform(
    y_train.reshape(-1, 1)
).ravel()

y_test_true = y_scaler.inverse_transform(
    y_test.reshape(-1, 1)
).ravel()


In [36]:
# STEP 8: Define evaluation function
def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, mae, r2


In [37]:
print("TRAINING RESULTS (GPR – 3 FEATURES)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_train_true, y_train_pred))

print("\nTESTING RESULTS (GPR – 3 FEATURES)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_test_true, y_test_pred))


TRAINING RESULTS (GPR – 3 FEATURES)
MSE, RMSE, MAE, R2 = (3.3102800718380573e-09, np.float64(5.7535033430407054e-05), 1.313617089080111e-05, 0.9999222835190547)

TESTING RESULTS (GPR – 3 FEATURES)
MSE, RMSE, MAE, R2 = (6.18448399068529e-11, np.float64(7.864149026236272e-06), 6.3930594344006685e-06, 0.9999987368235108)


MLR FOR VISCOSITY (VC)

In [38]:
# STEP 1: Import core libraries
import numpy as np
import pandas as pd

# STEP 1: Import ML utilities
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


# STEP 2: Define a reusable evaluation function
def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)   # Mean Squared Error
    rmse = np.sqrt(mse)                        # Root Mean Squared Error
    mae = mean_absolute_error(y_true, y_pred)  # Mean Absolute Error
    r2 = r2_score(y_true, y_pred)              # Coefficient of Determination
    return mse, rmse, mae, r2


In [39]:
# STEP 3B: Select input features for viscosity prediction
X_vc = vis_data[['X1_Temperature_C',
                 'X2_Concentration',
                 'X3_BaseFluid_viscosity']]

# STEP 3B: Select target variable (Viscosity)
y_vc = vis_data['Y_viscosity']


In [40]:
# STEP 4B: Initialize scaler
scaler_vc = MinMaxScaler()

# STEP 4B: Scale viscosity input features
X_vc_scaled = scaler_vc.fit_transform(X_vc)


In [41]:
# STEP 5B: Split viscosity data into training and testing sets
X_train_vc, X_test_vc, y_train_vc, y_test_vc = train_test_split(
    X_vc_scaled, y_vc, test_size=0.2, random_state=42
)


In [42]:
# STEP 6B: Initialize MLR model for viscosity
mlr_vc = LinearRegression()

# STEP 6B: Fit model on viscosity training data
mlr_vc.fit(X_train_vc, y_train_vc)


In [43]:
# STEP 7B: Predict viscosity for training data
y_train_pred_vc = mlr_vc.predict(X_train_vc)

# STEP 7B: Predict viscosity for testing data
y_test_pred_vc = mlr_vc.predict(X_test_vc)


In [44]:
# STEP 8B: Display training performance
print("\nTRAINING RESULTS (MLR – VISCOSITY, 3 FEATURES)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_train_vc, y_train_pred_vc))

# STEP 8B: Display testing performance
print("\nTESTING RESULTS (MLR – VISCOSITY, 3 FEATURES)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_test_vc, y_test_pred_vc))



TRAINING RESULTS (MLR – VISCOSITY, 3 FEATURES)
MSE, RMSE, MAE, R2 = (3.3648114860781e-07, np.float64(0.0005800699514781041), 0.0002569828367753102, 0.9921003267981161)

TESTING RESULTS (MLR – VISCOSITY, 3 FEATURES)
MSE, RMSE, MAE, R2 = (9.788300497796331e-08, np.float64(0.0003128625976015083), 0.00023893944393286898, 0.9980007465333085)


In [45]:
# STEP 9B: Extract regression coefficients
a, b, d = mlr_vc.coef_
c = mlr_vc.intercept_

# STEP 9B: Print final MLR equation for viscosity
print("\nFinal MLR Equation (Viscosity):")
print(f"μ = {a:.6f}*T + {b:.6f}*φ + {d:.6f}*BF + {c:.6f}")



Final MLR Equation (Viscosity):
μ = 0.000534*T + 0.001336*φ + 0.020785*BF + -0.000330


Random Forest R

In [46]:
# STEP 1: Import core libraries
import numpy as np
import pandas as pd

# STEP 1: Import Random Forest and utilities
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


# STEP 2: Define evaluation metrics
def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)     # Mean Squared Error
    rmse = np.sqrt(mse)                          # Root Mean Squared Error
    mae = mean_absolute_error(y_true, y_pred)    # Mean Absolute Error
    r2 = r2_score(y_true, y_pred)                # R-squared
    return mse, rmse, mae, r2


In [47]:
# STEP 3B: Select input features
X_vc = vis_data[['X1_Temperature_C',
                 'X2_Concentration',
                 'X3_BaseFluid_viscosity']]

# STEP 3B: Select target variable
y_vc = vis_data['Y_viscosity']


In [48]:
# STEP 4B: Split VC data
X_train_vc, X_test_vc, y_train_vc, y_test_vc = train_test_split(
    X_vc, y_vc, test_size=0.2, random_state=42
)


In [49]:
# STEP 6: Initialize Random Forest model
rf_vc = RandomForestRegressor(random_state=42)

# STEP 6: Define hyperparameter grid
rf_param_grid = {
    'n_estimators': [200, 300, 500],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}


In [50]:
# STEP 7: Perform GridSearchCV
grid_vc = GridSearchCV(
    rf_vc,
    rf_param_grid,
    cv=5,
    scoring='r2',
    n_jobs=-1
)

# STEP 7: Train the model
grid_vc.fit(X_train_vc, y_train_vc)


In [51]:
# STEP 8: Extract best RF model
best_rf_vc = grid_vc.best_estimator_

# STEP 8: Display best parameters
print("Best RF parameters (Viscosity):")
print(grid_vc.best_params_)


Best RF parameters (Viscosity):
{'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}


In [52]:
# STEP 9: Predict viscosity values
y_train_pred_vc = best_rf_vc.predict(X_train_vc)
y_test_pred_vc = best_rf_vc.predict(X_test_vc)


In [53]:
# STEP 10: Display training results
print("TRAINING RESULTS (RF – VISCOSITY, 3 FEATURES)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_train_vc, y_train_pred_vc))

# STEP 10: Display testing results
print("\nTESTING RESULTS (RF – VISCOSITY, 3 FEATURES)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_test_vc, y_test_pred_vc))


TRAINING RESULTS (RF – VISCOSITY, 3 FEATURES)
MSE, RMSE, MAE, R2 = (1.270797833530576e-08, np.float64(0.00011272966927701757), 4.311523707098843e-05, 0.999701650816633)

TESTING RESULTS (RF – VISCOSITY, 3 FEATURES)
MSE, RMSE, MAE, R2 = (9.214026739947623e-09, np.float64(9.598972205370543e-05), 6.507191821908249e-05, 0.9998118041542945)


In [54]:
# STEP 11: Display feature importance
feature_importance_vc = pd.DataFrame({
    'Feature': X_vc.columns,
    'Importance': best_rf_vc.feature_importances_
})

feature_importance_vc


Unnamed: 0,Feature,Importance
0,X1_Temperature_C,0.308954
1,X2_Concentration,0.007014
2,X3_BaseFluid_viscosity,0.684032


XG BOOST


In [55]:
# STEP 1: Import core libraries
import numpy as np
import pandas as pd

# STEP 1: Import XGBoost regressor
from xgboost import XGBRegressor

# STEP 1: Import utilities
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


# STEP 2: Define evaluation metrics
def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)     # Mean Squared Error
    rmse = np.sqrt(mse)                          # Root Mean Squared Error
    mae = mean_absolute_error(y_true, y_pred)    # Mean Absolute Error
    r2 = r2_score(y_true, y_pred)                # R-squared
    return mse, rmse, mae, r2


In [56]:

# STEP 3B: Input features
X_vc = vis_data[['X1_Temperature_C',
                 'X2_Concentration',
                 'X3_BaseFluid_viscosity']]

# STEP 3B: Target variable
y_vc = vis_data['Y_viscosity']


In [57]:
# STEP 4B: Split VC dataset
X_train_vc, X_test_vc, y_train_vc, y_test_vc = train_test_split(
    X_vc, y_vc, test_size=0.2, random_state=42
)


In [59]:
# STEP 5B: Define XGBoost hyperparameter grid (VC notebook)
param_grid_xgb = {
    'n_estimators': [200, 400],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.05, 0.1],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}


In [60]:
# STEP 5B: Initialize XGBoost regressor
xgb_vc = XGBRegressor(
    objective='reg:squarederror',
    random_state=42
)

# STEP 6B: Grid search with same parameter grid
grid_vc = GridSearchCV(
    xgb_vc,
    param_grid_xgb,
    cv=5,
    scoring='r2',
    n_jobs=-1
)

grid_vc.fit(X_train_vc, y_train_vc)


In [61]:
# STEP 7B: Best model
best_xgb_vc = grid_vc.best_estimator_

print("Best XGBoost parameters (VC):")
print(grid_vc.best_params_)

# STEP 7B: Predictions
y_train_pred_vc = best_xgb_vc.predict(X_train_vc)
y_test_pred_vc = best_xgb_vc.predict(X_test_vc)


Best XGBoost parameters (VC):
{'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 400, 'subsample': 0.8}


In [62]:
print("\nTRAINING RESULTS (XGBOOST – VISCOSITY)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_train_vc, y_train_pred_vc))

print("\nTESTING RESULTS (XGBOOST – VISCOSITY)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_test_vc, y_test_pred_vc))



TRAINING RESULTS (XGBOOST – VISCOSITY)
MSE, RMSE, MAE, R2 = (5.120206212074236e-08, np.float64(0.00022627872661994182), 0.00014040180554812282, 0.9987979131678254)

TESTING RESULTS (XGBOOST – VISCOSITY)
MSE, RMSE, MAE, R2 = (5.2180723744783514e-08, np.float64(0.00022843100434219413), 0.00017815828420372713, 0.9989342124011751)


polynomial R

In [67]:
# STEP 1: Import core libraries
import numpy as np
import pandas as pd

# STEP 1: Import preprocessing and models
from sklearn.preprocessing import PolynomialFeatures, MinMaxScaler
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# STEP 2: Define evaluation metrics
def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, mae, r2


In [68]:
# STEP 4: Input features (3 features)
X_vc = vis_data[['X1_Temperature_C',
                 'X2_Concentration',
                 'X3_BaseFluid_viscosity']]

# STEP 4: Target variable
y_vc = vis_data['Y_viscosity']


In [69]:
# STEP 5: Train–test split
X_train_vc, X_test_vc, y_train_vc, y_test_vc = train_test_split(
    X_vc, y_vc, test_size=0.2, random_state=42
)


In [70]:
# STEP 6: Scale features using training data only
scaler_vc = MinMaxScaler()
X_train_vc_scaled = scaler_vc.fit_transform(X_train_vc)
X_test_vc_scaled = scaler_vc.transform(X_test_vc)


In [72]:
# STEP 7: Polynomial feature generation
poly = PolynomialFeatures(degree=2, include_bias=False)

X_train_vc_poly = poly.fit_transform(X_train_vc_scaled)
X_test_vc_poly = poly.transform(X_test_vc_scaled)

# STEP 8: Train polynomial regression using Ridge (regularization)
poly_vc_model = Ridge(alpha=1.0)
poly_vc_model.fit(X_train_vc_poly, y_train_vc)


In [73]:
# STEP 9: Predictions
y_train_pred_vc = poly_vc_model.predict(X_train_vc_poly)
y_test_pred_vc = poly_vc_model.predict(X_test_vc_poly)

# STEP 9: Evaluation
print("TRAINING RESULTS (Polynomial – VISCOSITY)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_train_vc, y_train_pred_vc))

print("\nTESTING RESULTS (Polynomial – VISCOSITY)")
print("MSE, RMSE, MAE, R2 =", evaluate(y_test_vc, y_test_pred_vc))


TRAINING RESULTS (Polynomial – VISCOSITY)
MSE, RMSE, MAE, R2 = (4.3127747380154074e-07, np.float64(0.0006567171946900284), 0.00034131523864740535, 0.9898747638122893)

TESTING RESULTS (Polynomial – VISCOSITY)
MSE, RMSE, MAE, R2 = (1.0254697098677624e-07, np.float64(0.00032022955982665975), 0.00027377584815315743, 0.9979054853568279)


In [74]:
# STEP 10: Extract polynomial feature names
feature_names = poly.get_feature_names_out(X_vc.columns)

# STEP 10: Get coefficients and intercept
coefficients = poly_vc_model.coef_
intercept = poly_vc_model.intercept_

# STEP 10: Create equation table
poly_equation_vc = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': coefficients
})

print("Polynomial Regression Equation (VISCOSITY)")
print(f"Intercept = {intercept:.6f}")
poly_equation_vc


Polynomial Regression Equation (VISCOSITY)
Intercept = 0.004901


Unnamed: 0,Feature,Coefficient
0,X1_Temperature_C,-0.003859
1,X2_Concentration,0.00032
2,X3_BaseFluid_viscosity,0.008246
3,X1_Temperature_C^2,-0.000867
4,X1_Temperature_C X2_Concentration,-0.000603
5,X1_Temperature_C X3_BaseFluid_viscosity,0.000442
6,X2_Concentration^2,-9.2e-05
7,X2_Concentration X3_BaseFluid_viscosity,0.002542
8,X3_BaseFluid_viscosity^2,0.006791
