# MODEL TRAINING - SET 1

### 1.1 Import Data and Required Packages
#### Importing Pandas, Numpy, Matplotlib, Seaborn and Warings Library.

In [194]:
#Importing recquired packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

##### Importing Modelling Libraries

In [195]:
#Importing ML Packages
from sklearn.model_selection import train_test_split, LeaveOneOut
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, ElasticNet
from sklearn.cross_decomposition import PLSRegression 
from sklearn.svm import SVR                          
from sklearn.tree import DecisionTreeRegressor 
from sklearn.neighbors import KNeighborsRegressor   

from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

from xgboost import XGBRegressor

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, TensorBoard
from keras.optimizers import Adam
import datetime

#### Importing CSV data as Pandas dataframe

In [196]:
#Importing CSV data as Pandas dataframe
df=pd.read_csv('dataset/Extracted_CrudeData.csv')

#Show top 5 rows of the dataframe
df.head()

Unnamed: 0,Crude Name,StdLiquidDensity (kg/m3),SulfurByWt (%),ConradsonCarbonByWt (%),NitrogenByWt (%),Distillation Mass @ X Pct (C)@ 1 (%) - TBP,Distillation Mass @ X Pct (C)@ 5 (%) - TBP,Distillation Mass @ X Pct (C)@ 10 (%) - TBP,Distillation Mass @ X Pct (C)@ 30 (%) - TBP,Distillation Mass @ X Pct (C)@ 50 (%) - TBP,...,AromByWt (%),NaphthenesByWt (%),ParaffinsByWt (%),KinematicViscosity (cSt)@ 37.78 (C),KinematicViscosity (cSt)@ 98.89 (C),KinematicViscosity (cSt)@ 20 (C),KinematicViscosity (cSt)@ 40 (C),KinematicViscosity (cSt)@ 50 (C),KinematicViscosity (cSt)@ 100 (C),KinematicViscosity (cSt)@ 150 (C)
0,Akpo-2014,794.689603,0.070591,0.732083,0.062825,-16.244551,24.679712,74.031996,147.692242,231.447841,...,24.571141,41.011754,34.417105,1.7573028543796,0.833438,2.091216,1.700584,1.443025,0.824673,
1,AlJurf-2014,874.431106,1.826957,5.241924,0.168953,26.236785,98.856864,138.763063,251.728374,354.117493,...,42.157942,36.772998,21.06906,7.05914911333451,2.025492,11.19054,6.703264,5.225721,1.99183,
2,AlJurf-2015,872.972557,1.840926,5.365742,0.146255,26.408162,96.128866,135.041586,248.512058,351.41208,...,39.122494,39.730427,21.147079,6.77137102006056,2.01033,10.544402,6.437194,5.049799,1.977528,
3,Ashtart-2015,870.86026,1.002298,5.194113,0.233022,17.461417,84.608073,121.721566,248.007854,358.275744,...,40.484153,44.017708,15.498139,6.6515298282988,1.99058,10.208774,6.317605,4.927973,1.959541,
4,Azeri-Light-Supsa-2019,846.458826,0.147956,0.735866,0.113251,4.265112,81.831627,120.9554,229.274959,320.215604,...,21.364167,58.922758,19.713075,3.93702798465944,1.841686,5.11742,3.810679,3.255558,1.818677,


In [197]:
#Shape of dataset
df.shape

(53, 24)

In [198]:
#Renaming columns for better readability
df.rename(columns={
    'Crude Name': 'Crude_Name',
    'StdLiquidDensity (kg/m3)': 'StdLiqDensity_kgm3',
    'SulfurByWt (%)': 'SulfurWt_pct',
    'ConradsonCarbonByWt (%)': 'ConradsonCarbon_wt_pct',
    'NitrogenByWt (%)': 'NitrogenWt_pct',
    
    'Distillation Mass @ X Pct (C)@ 1 (%) - TBP': 'TBP_TempAt_1pct',
    'Distillation Mass @ X Pct (C)@ 5 (%) - TBP': 'TBP_TempAt_5pct',
    'Distillation Mass @ X Pct (C)@ 10 (%) - TBP': 'TBP_TempAt_10pct',
    'Distillation Mass @ X Pct (C)@ 30 (%) - TBP': 'TBP_TempAt_30pct',
    'Distillation Mass @ X Pct (C)@ 50 (%) - TBP': 'TBP_TempAt_50pct',
    'Distillation Mass @ X Pct (C)@ 70 (%) - TBP': 'TBP_TempAt_70pct',
    'Distillation Mass @ X Pct (C)@ 90 (%) - TBP': 'TBP_TempAt_90pct',
    'Distillation Mass @ X Pct (C)@ 95 (%) - TBP': 'TBP_TempAt_95pct',
    'Distillation Mass @ X Pct (C)@ 99 (%) - TBP': 'TBP_TempAt_99pct',

    'AromByWt (%)': 'AromWt_pct',
    'NaphthenesByWt (%)': 'NaphWt_pct',
    'ParaffinsByWt (%)': 'ParaWt_pct',

    'KinematicViscosity (cSt)@ 37.78 (C)': 'KV_37.78c_cSt',
    'KinematicViscosity (cSt)@ 98.89 (C)': 'KV_98.89c_cSt',
    'KinematicViscosity (cSt)@ 20 (C)': 'KV_20c_cSt',
    'KinematicViscosity (cSt)@ 40 (C)': 'KV_40c_cSt',
    'KinematicViscosity (cSt)@ 50 (C)': 'KV_50c_cSt',
    'KinematicViscosity (cSt)@ 100 (C)': 'KV_100c_cSt',
    'KinematicViscosity (cSt)@ 150 (C)': 'KV_150c_cSt'
}, inplace=True)
df.head()


Unnamed: 0,Crude_Name,StdLiqDensity_kgm3,SulfurWt_pct,ConradsonCarbon_wt_pct,NitrogenWt_pct,TBP_TempAt_1pct,TBP_TempAt_5pct,TBP_TempAt_10pct,TBP_TempAt_30pct,TBP_TempAt_50pct,...,AromWt_pct,NaphWt_pct,ParaWt_pct,KV_37.78c_cSt,KV_98.89c_cSt,KV_20c_cSt,KV_40c_cSt,KV_50c_cSt,KV_100c_cSt,KV_150c_cSt
0,Akpo-2014,794.689603,0.070591,0.732083,0.062825,-16.244551,24.679712,74.031996,147.692242,231.447841,...,24.571141,41.011754,34.417105,1.7573028543796,0.833438,2.091216,1.700584,1.443025,0.824673,
1,AlJurf-2014,874.431106,1.826957,5.241924,0.168953,26.236785,98.856864,138.763063,251.728374,354.117493,...,42.157942,36.772998,21.06906,7.05914911333451,2.025492,11.19054,6.703264,5.225721,1.99183,
2,AlJurf-2015,872.972557,1.840926,5.365742,0.146255,26.408162,96.128866,135.041586,248.512058,351.41208,...,39.122494,39.730427,21.147079,6.77137102006056,2.01033,10.544402,6.437194,5.049799,1.977528,
3,Ashtart-2015,870.86026,1.002298,5.194113,0.233022,17.461417,84.608073,121.721566,248.007854,358.275744,...,40.484153,44.017708,15.498139,6.6515298282988,1.99058,10.208774,6.317605,4.927973,1.959541,
4,Azeri-Light-Supsa-2019,846.458826,0.147956,0.735866,0.113251,4.265112,81.831627,120.9554,229.274959,320.215604,...,21.364167,58.922758,19.713075,3.93702798465944,1.841686,5.11742,3.810679,3.255558,1.818677,


#### Duplicate Values, Missing Values & Data Types

In [199]:
#Checks for duplicate values
df.duplicated().sum()

np.int64(0)

In [200]:
#Checks for missing values
df.isna().sum()

Crude_Name                 0
StdLiqDensity_kgm3         0
SulfurWt_pct               0
ConradsonCarbon_wt_pct     0
NitrogenWt_pct             0
TBP_TempAt_1pct            0
TBP_TempAt_5pct            0
TBP_TempAt_10pct           0
TBP_TempAt_30pct           0
TBP_TempAt_50pct           0
TBP_TempAt_70pct           0
TBP_TempAt_90pct           0
TBP_TempAt_95pct           0
TBP_TempAt_99pct           0
AromWt_pct                 0
NaphWt_pct                 0
ParaWt_pct                 0
KV_37.78c_cSt              0
KV_98.89c_cSt              0
KV_20c_cSt                22
KV_40c_cSt                16
KV_50c_cSt                11
KV_100c_cSt               15
KV_150c_cSt               47
dtype: int64

In [201]:
#Deleting columns with more than 50% missing values
df.drop(columns='KV_150c_cSt', inplace=True)
df.shape

(53, 23)

In [202]:
#Exploring data types
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53 entries, 0 to 52
Data columns (total 23 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Crude_Name              53 non-null     object 
 1   StdLiqDensity_kgm3      53 non-null     float64
 2   SulfurWt_pct            53 non-null     float64
 3   ConradsonCarbon_wt_pct  53 non-null     float64
 4   NitrogenWt_pct          53 non-null     float64
 5   TBP_TempAt_1pct         53 non-null     float64
 6   TBP_TempAt_5pct         53 non-null     float64
 7   TBP_TempAt_10pct        53 non-null     float64
 8   TBP_TempAt_30pct        53 non-null     float64
 9   TBP_TempAt_50pct        53 non-null     float64
 10  TBP_TempAt_70pct        53 non-null     float64
 11  TBP_TempAt_90pct        53 non-null     float64
 12  TBP_TempAt_95pct        53 non-null     float64
 13  TBP_TempAt_99pct        53 non-null     float64
 14  AromWt_pct              53 non-null     floa

In [203]:
#Changing data types
df["KV_37.78c_cSt"] = pd.to_numeric(df["KV_37.78c_cSt"], errors="coerce")

#### ML Model Training

In [204]:
#Independent Features
X = df.drop(columns=['Crude_Name', 'AromWt_pct', 'NaphWt_pct', 'ParaWt_pct', 'KV_37.78c_cSt', 'KV_98.89c_cSt', 
                    'KV_20c_cSt', 'KV_40c_cSt', 'KV_50c_cSt', 'KV_100c_cSt'])
X.head()

Unnamed: 0,StdLiqDensity_kgm3,SulfurWt_pct,ConradsonCarbon_wt_pct,NitrogenWt_pct,TBP_TempAt_1pct,TBP_TempAt_5pct,TBP_TempAt_10pct,TBP_TempAt_30pct,TBP_TempAt_50pct,TBP_TempAt_70pct,TBP_TempAt_90pct,TBP_TempAt_95pct,TBP_TempAt_99pct
0,794.689603,0.070591,0.732083,0.062825,-16.244551,24.679712,74.031996,147.692242,231.447841,311.635864,465.560748,539.770951,687.654783
1,874.431106,1.826957,5.241924,0.168953,26.236785,98.856864,138.763063,251.728374,354.117493,468.885623,655.500149,748.288326,914.614065
2,872.972557,1.840926,5.365742,0.146255,26.408162,96.128866,135.041586,248.512058,351.41208,481.574377,655.699458,736.138972,885.381839
3,870.86026,1.002298,5.194113,0.233022,17.461417,84.608073,121.721566,248.007854,358.275744,468.225815,781.519485,949.416468,1161.723971
4,846.458826,0.147956,0.735866,0.113251,4.265112,81.831627,120.9554,229.274959,320.215604,423.105585,575.274504,642.15643,753.098296


In [205]:
#Dependent Features
y = df[['AromWt_pct', 'NaphWt_pct', 'ParaWt_pct']]

In [206]:
#Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((42, 13), (11, 13), (42, 3), (11, 3))

In [207]:
#Define categorical and numerical feature columns
num_features = [col for col in X.columns]

#Define transformations for each feature
numeric_transformer = StandardScaler()                    

#Apply scaling to numeric features
preprocessor = ColumnTransformer([("StandardScaler", numeric_transformer, num_features)])


In [208]:
#Machine Learning Algorithms
models = {
    "Linear Regression": Pipeline([
        ('pre', preprocessor),
        ('pca', PCA(n_components=5)),
        ('linreg', MultiOutputRegressor(LinearRegression()))
    ]),
    "Ridge Regression": Pipeline([
        ('pre', preprocessor),
        ('ridge', MultiOutputRegressor(Ridge(alpha=1.0)))
    ]),
    "ElasticNet": Pipeline([
        ('pre', preprocessor),
        ('pca', PCA(n_components=3)),
        ('elasticnet', MultiOutputRegressor(ElasticNet(alpha=1.0)))
    ]),
    "SVR": Pipeline([
        ('pre', preprocessor),
        ('pca', PCA(n_components=5)),
        ('svr', MultiOutputRegressor(SVR(kernel='rbf', C=100, epsilon=0.1)))
    ]),
    "KNN": Pipeline([
        ('pre', preprocessor), 
        ('knn', MultiOutputRegressor(KNeighborsRegressor(n_neighbors=3)))
    ]),
    "PLS Regression": Pipeline([
        ('pre', preprocessor),
        ('pls', PLSRegression(n_components=3))
    ]),
    "DecisionTreeRegressor": Pipeline([
        ('pre', preprocessor),  
        ('dt', MultiOutputRegressor(DecisionTreeRegressor(random_state=42)))
    ]),
    "Random Forest": Pipeline([
        ('pre', preprocessor),
        ('rf', MultiOutputRegressor(RandomForestRegressor(n_estimators=200, random_state=42)))
    ]),
    "XGBoost": Pipeline([
        ('pre', preprocessor),
        ('xgb', MultiOutputRegressor(XGBRegressor(n_estimators=50, max_depth=3, random_state=42)))
    ]),
    
}

In [209]:
#Train and evaluate all models
print("Training ML Models...")

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[name] = {
        'MAE': mean_absolute_error(y_test, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred)),
        'R2': r2_score(y_test, y_pred)
    }
results_df = pd.DataFrame(results).T
print(results_df.sort_values('R2', ascending=False))


Training ML Models...
                            MAE       RMSE        R2
SVR                    4.349985   5.212477  0.612552
PLS Regression         5.151014   7.510719  0.568989
Ridge Regression       4.964183   6.456447  0.564125
Linear Regression      5.413777   7.577290  0.558218
KNN                    6.412719   7.875427  0.441627
ElasticNet             6.410438   8.829062  0.417243
Random Forest          6.204917   8.564954  0.352601
XGBoost                6.907509   9.184071  0.263646
DecisionTreeRegressor  9.178900  14.495270 -0.684231


#### ANN Model Training

In [210]:
#Independent Features
X = df.drop(columns=['Crude_Name', 'AromWt_pct', 'NaphWt_pct', 'ParaWt_pct', 'KV_37.78c_cSt', 'KV_98.89c_cSt', 
                    'KV_20c_cSt', 'KV_40c_cSt', 'KV_50c_cSt', 'KV_100c_cSt'])
X.head()

Unnamed: 0,StdLiqDensity_kgm3,SulfurWt_pct,ConradsonCarbon_wt_pct,NitrogenWt_pct,TBP_TempAt_1pct,TBP_TempAt_5pct,TBP_TempAt_10pct,TBP_TempAt_30pct,TBP_TempAt_50pct,TBP_TempAt_70pct,TBP_TempAt_90pct,TBP_TempAt_95pct,TBP_TempAt_99pct
0,794.689603,0.070591,0.732083,0.062825,-16.244551,24.679712,74.031996,147.692242,231.447841,311.635864,465.560748,539.770951,687.654783
1,874.431106,1.826957,5.241924,0.168953,26.236785,98.856864,138.763063,251.728374,354.117493,468.885623,655.500149,748.288326,914.614065
2,872.972557,1.840926,5.365742,0.146255,26.408162,96.128866,135.041586,248.512058,351.41208,481.574377,655.699458,736.138972,885.381839
3,870.86026,1.002298,5.194113,0.233022,17.461417,84.608073,121.721566,248.007854,358.275744,468.225815,781.519485,949.416468,1161.723971
4,846.458826,0.147956,0.735866,0.113251,4.265112,81.831627,120.9554,229.274959,320.215604,423.105585,575.274504,642.15643,753.098296


In [211]:
#Dependent Features
y = df[['AromWt_pct', 'NaphWt_pct', 'ParaWt_pct']]

In [212]:
#Cross-validation using Leave One Out
loo = LeaveOneOut()

y_true_all = []
y_pred_all = []

print(f"Staring LOOCV on {len(df)} samples....")

for i, (train_index, test_index) in enumerate(loo.split(X)):
    
    # Progress Monitor (Optional)
    if i % 10 == 0: print(f"Processing Fold {i}/{len(df)}...")

    # A. Split Data
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler_X = StandardScaler()
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    
    # B. Scaling (Fit ONLY on Train)
    scaler_X = StandardScaler()
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    
    scaler_y = StandardScaler()
    y_train_scaled = scaler_y.fit_transform(y_train)
    # Note: We don't scale y_test here because we want to compare against real values later
    
    # C. PCA (Fit ONLY on Train)
    pca = PCA(n_components=0.95)
    X_train_pca = pca.fit_transform(X_train_scaled)
    X_test_pca = pca.transform(X_test_scaled)
    
    # D. Re-Define & Compile ANN Model (Must be inside loop!)
    ann_model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_pca.shape[1],)),
        tf.keras.layers.Dropout(0.2), # Good for small data
        tf.keras.layers.Dense(32, activation='relu'),
        # Output layer with 3 neurons (Arom, Naph, Para)
        tf.keras.layers.Dense(3, activation='linear') 
    ])
    
    ann_model.compile(optimizer='adam', loss='mse')
    
    # E. Train (Use verbose=0 to silence the 53 training bars)
    ann_model.fit(X_train_pca, y_train_scaled, epochs=100, batch_size=8, verbose=0)
    
    # F. Predict
    pred_scaled = ann_model.predict(X_test_pca, verbose=0)
    
    # G. Inverse Transform
    pred_real = scaler_y.inverse_transform(pred_scaled)
    
    # H. Force Sum to 100% (Physics Constraint)
    # This formula normalizes the 3 values so they sum to exactly 100
    pred_real = 100 * pred_real / np.sum(pred_real)
    
    # I. Store
    y_true_all.append(y_test.values[0])
    y_pred_all.append(pred_real[0])

# 4. Final Evaluation
y_true_all = np.array(y_true_all)
y_pred_all = np.array(y_pred_all)

# Calculate Metrics
r2 = r2_score(y_true_all, y_pred_all) # Default averages all outputs
mae = mean_absolute_error(y_true_all, y_pred_all)
rmse = np.sqrt(mean_squared_error(y_true_all, y_pred_all))

print("\n" + "="*30)
print("FINAL LOOCV RESULTS (ANN)")
print("="*30)
print(f"R2 Score: {r2:.4f}")
print(f"MAE:      {mae:.4f} %")
print(f"RMSE:     {rmse:.4f} %")

Staring LOOCV on 53 samples....
Processing Fold 0/53...
Processing Fold 10/53...
Processing Fold 20/53...
Processing Fold 30/53...
Processing Fold 40/53...
Processing Fold 50/53...

FINAL LOOCV RESULTS (ANN)
R2 Score: 0.3429
MAE:      7.5947 %
RMSE:     10.5753 %


In [213]:
#Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((42, 13), (11, 13), (42, 3), (11, 3))

In [214]:
scaler_X = StandardScaler()
scaler_y = StandardScaler() 

#Fit Train & transform Test
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled  = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled  = scaler_y.transform(y_test)

In [215]:
pca=PCA(n_components=0.95)
X_train_pca=pca.fit_transform(X_train_scaled)
X_test_pca=pca.transform(X_test_scaled)

In [216]:
ann_model = Sequential([Dense(64, activation='relu', input_shape=(X_train_pca.shape[1],)),
                    
                    Dense(32, activation='relu'),
                    Dense(3, activation='linear')])
ann_model.summary()

In [217]:
##Compile the model
ann_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

In [218]:
##Setup Tensorboard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [219]:
##Setup Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)

In [220]:
print("Training Optimized ANN...")
history = ann_model.fit(X_train_pca, y_train_scaled, validation_data=(X_test_pca, y_test_scaled), epochs=500, batch_size=8,
                  callbacks=[early_stopping, tensorboard_callback])

Training Optimized ANN...
Epoch 1/500
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 149ms/step - loss: 0.9506 - mae: 0.7498 - val_loss: 0.5356 - val_mae: 0.5732
Epoch 2/500
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.7706 - mae: 0.6774 - val_loss: 0.4877 - val_mae: 0.5491
Epoch 3/500
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.6755 - mae: 0.6322 - val_loss: 0.4565 - val_mae: 0.5371
Epoch 4/500
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - loss: 0.6152 - mae: 0.6037 - val_loss: 0.4386 - val_mae: 0.5310
Epoch 5/500
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.5651 - mae: 0.5736 - val_loss: 0.4256 - val_mae: 0.5226
Epoch 6/500
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.5425 - mae: 0.5586 - val_loss: 0.4188 - val_mae: 0.5170
Epoch 7/500
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

In [221]:
print("Predicting on Test Set...")
y_pred_scaled = ann_model.predict(X_test_pca)

#Convert back to real percentages
y_pred_real = scaler_y.inverse_transform(y_pred_scaled)
y_test_real = scaler_y.inverse_transform(y_test_scaled)

y_pred_real = 100 * y_pred_real / np.sum(y_pred_real, axis=1, keepdims=True)

#Calculate Metrics on the Real Data
r2 = r2_score(y_test_real, y_pred_real, multioutput='uniform_average')
mae = mean_absolute_error(y_test_real, y_pred_real)
rmse = np.sqrt(mean_squared_error(y_test_real, y_pred_real))

print("ANN PERFORMANCE REPORT")
print(f"R2 Score: {r2:.4f}")
print(f"MAE:      {mae:.4f} %")
print(f"RMSE:     {rmse:.4f} %")
print("-" * 100)

results_df = pd.DataFrame(
    data=np.hstack((y_test_real, y_pred_real)),
    columns=['Actual_Arom', 'Actual_Naph', 'Actual_Para', 'Pred_Arom', 'Pred_Naph', 'Pred_Para']
)
print("\nSample Predictions (First 5 Rows):")
display(results_df.head())

Predicting on Test Set...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step
ANN PERFORMANCE REPORT
R2 Score: 0.4551
MAE:      5.0794 %
RMSE:     6.1720 %
----------------------------------------------------------------------------------------------------

Sample Predictions (First 5 Rows):


Unnamed: 0,Actual_Arom,Actual_Naph,Actual_Para,Pred_Arom,Pred_Naph,Pred_Para
0,62.85622,19.805446,17.338334,54.475349,25.819147,19.705503
1,26.798845,42.786668,30.414487,25.572268,51.162609,23.265121
2,20.906927,54.520801,24.572272,32.985569,37.182423,29.832008
3,42.706787,41.868136,15.425077,43.289238,36.653114,20.05765
4,35.467642,45.089065,19.443293,44.659328,39.382744,15.957932
