In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import warnings
import tensorflow
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator   # ==== NEW ====
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import keras_tuner as kt
from tqdm import tqdm
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, Concatenate
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from sklearn import metrics
import joblib

tensorflow.random.set_seed(20)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings("ignore")

In [None]:
CURVES_DIR = './curves'         
IMAGES_INDEX_XLSX = 'images_index.xlsx'  
IMAGES_ROOT = './images'         
TRAIN_IMG_DIR = './train_images'   
TEST_IMG_DIR  = './test_images'   

labels = pd.DataFrame(columns=['name'] + ['y' + str(i) for i in range(200)])
for file in tqdm(os.listdir(CURVES_DIR)):
    if not file.lower().endswith(('.xlsx', '.xls', '.csv')):
        continue
    tmp = pd.read_excel(f'{CURVES_DIR}/{file}', index_col=0).iloc[1:,].T
    tmp = tmp.reset_index()
    tmp.columns = ['name'] + ['y' + str(i) for i in range(200)]
    labels = pd.concat([labels, tmp], axis=0)

labels2 = pd.read_excel(IMAGES_INDEX_XLSX)  
labels = pd.merge(labels2, labels, how='inner', on='name')


ss = StandardScaler()
y_cols = ['y' + str(i) for i in range(200)]
labels[y_cols] = ss.fit_transform(labels[y_cols])
joblib.dump(ss, 'ss.pkl')


def list_names_from_dir(d):
    files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
    return set(files)

train_names = list_names_from_dir(TRAIN_IMG_DIR)
test_names  = list_names_from_dir(TEST_IMG_DIR)


train_csv = labels[labels['name'].isin(train_names)].reset_index(drop=True)
test_csv  = labels[labels['name'].isin(test_names)].reset_index(drop=True)

print(f"Train samples: {len(train_csv)}, Test samples: {len(test_csv)}")


plt.figure(figsize=(12, 4), dpi=120)
for i in range(min(10, len(labels))):
    y_values = labels.iloc[i, labels.columns.get_loc('y0'):labels.columns.get_loc('y199')+1].values
    sns.lineplot(x=range(200), y=y_values)
plt.show()

In [None]:
class CustomDataGenerator(Sequence):
    def __init__(self, csv_file, directory, batch_size, target_size, label_list,
                 shuffle=True, augment=False):  # ==== CHANGED ====
        self.csv_file = csv_file
        self.directory = directory
        self.batch_size = batch_size
        self.target_size = target_size
        self.label_list = label_list
        self.shuffle = shuffle
        self.augment = augment                     
        self.on_epoch_end()

        if self.augment:
            
            self.idg = ImageDataGenerator(
                rotation_range=10,
                width_shift_range=0.1,
                height_shift_range=0.1,
                shear_range=0.05,
                zoom_range=0.1,
                horizontal_flip=True,
                fill_mode='nearest'
            )
        else:
            self.idg = None

    def __len__(self):
        return int(np.floor(len(self.csv_file) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.csv_file.iloc[k] for k in indexes]
        X, y = self.__data_generation(batch)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.csv_file))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, batch):
        X1 = np.empty((self.batch_size, *self.target_size, 3), dtype=np.float32)
        X2 = np.empty((self.batch_size, 1), dtype=np.float32)  # 
        y  = np.empty((self.batch_size, len(self.label_list) - 1), dtype=np.float32)

        for i, data in enumerate(batch):
            img_path = os.path.join(self.directory, data['name'])
            image = load_img(img_path, target_size=self.target_size)
            arr = img_to_array(image) / 255.0

            if self.augment and self.idg is not None:
                arr = self.idg.random_transform(arr)  

            X1[i] = arr
            X2[i, 0] = data[self.label_list[0]]   
            y[i] = data[self.label_list[1:]]     

        return [X1, X2], y


label_list = ['porosity'] + y_cols
batch_size = 16
target_size = (256, 256)


train_generator = CustomDataGenerator(train_csv, IMAGES_ROOT, batch_size, target_size, label_list,
                                      shuffle=True, augment=True)     # ==== CHANGED ====
test_generator  = CustomDataGenerator(test_csv,  IMAGES_ROOT, batch_size, target_size, label_list,
                                      shuffle=False, augment=False)   # ==== CHANGED ====

In [None]:
def stack_y(gen):
    ys = []
    for _i in range(len(gen)):
        _, yy = gen[_i]
        ys.append(yy)
    return np.concatenate(ys, axis=0)

y_train = stack_y(train_generator)
y_test  = stack_y(test_generator)
print("y_train.shape, y_test.shape =", y_train.shape, y_test.shape)


def plot_model_history(model_history):
    fig, axs = plt.subplots(1, 2, figsize=(10, 3), dpi=120)
    axs[0].plot(range(1, len(model_history.history['mse'])+1), model_history.history['mse'])
    axs[0].plot(range(1, len(model_history.history['val_mse'])+1), model_history.history['val_mse'])
    axs[0].set_title('Model MSE'); axs[0].set_ylabel('mse'); axs[0].set_xlabel('Epoch'); axs[0].legend(['train', 'val'], loc='best')
    axs[1].plot(range(1, len(model_history.history['loss'])+1), model_history.history['loss'])
    axs[1].plot(range(1, len(model_history.history['val_loss'])+1), model_history.history['val_loss'])
    axs[1].set_title('Model Loss'); axs[1].set_ylabel('Loss'); axs[1].set_xlabel('Epoch'); axs[1].legend(['train', 'val'], loc='best')
    fig.savefig('curve.jpg', dpi=600, bbox_inches='tight')
    plt.show()

In [None]:
def build_model(hp):
    input_image = Input(shape=(256, 256, 3))
    input_features1 = Input(shape=(1,))  # porosity

    base_model = tf.keras.applications.Xception(
        weights='imagenet', include_top=False, input_shape=(256, 256, 3)
    )
    x = base_model(input_image)
    x = Flatten()(x)
    x = Concatenate()([x, input_features1])


    for i in range(2):
        x = Dense(
            units=hp.Int(f'units_{i}', min_value=128, max_value=512, step=16),
            activation=hp.Choice(f'activation_{i}', values=['relu', 'tanh'])
        )(x)

    output = Dense(200)(x)
    model = Model(inputs=[input_image, input_features1], outputs=output)
    model.compile(
        optimizer=Adam(
            learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-3, sampling='LOG')
        ),
        loss='mse',
        metrics=['mse']
    )
    return model


tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=100,                
    executions_per_trial=1,    
    directory='my_dir',
    project_name='helloworld'
)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)


tuner.search(train_generator, epochs=10, validation_data=test_generator, callbacks=[stop_early])


best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best learning rate: {best_hps.get('learning_rate')}")
for i in range(2):
    print(f"Layer {i+1}: units={best_hps.get(f'units_{i}')}, act={best_hps.get(f'activation_{i}')}")


In [None]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=100,
    validation_data=test_generator,
    validation_steps=len(test_generator),
    callbacks=[tf.keras.callbacks.ModelCheckpoint("./best_model.h5", save_best_only=True, monitor='val_loss'),
               stop_early]
)
plot_model_history(history)

In [None]:
hist_df = pd.DataFrame(history.history)
hist_df.insert(0, 'epoch', range(1, len(hist_df)+1)) 

hist_df.to_excel('history_epoch_metrics.xlsx', index=False)

In [None]:
from tensorflow.keras.models import load_model
model = load_model('best_model.h5', compile=False)

y_test_pred  = model.predict(test_generator, verbose=0)
y_train_pred = model.predict(train_generator, verbose=0)

y_test_true    = ss.inverse_transform(y_test)
y_test_pred_is = ss.inverse_transform(y_test_pred)

In [None]:
r2_list, area_list = [], []
for i in range(y_test_true.shape[0]):
    r2_list.append(round(metrics.r2_score(y_test_true[i, :], y_test_pred_is[i, :]), 4))

    denom = np.trapz(y_test_true[i, :], dx=1.0)
    nume  = np.trapz(y_test_pred_is[i, :], dx=1.0)
    area_list.append((nume / denom) if denom != 0 else np.nan)


eps = 1e-8
denom = np.maximum(np.abs(y_test_true), eps)              # shape: (N,200)
rel_rmse_vec = np.sqrt(np.mean(((y_test_pred_is - y_test_true) / denom) ** 2, axis=1))
rel_rmse_mean = float(np.nanmean(rel_rmse_vec))


df_metrics = pd.DataFrame({
    'name': test_csv['name'].reset_index(drop=True),
    'R2': r2_list,
    'AreaRatio': area_list,
    'RelRMSE': rel_rmse_vec
})
df_metrics.to_excel('test_metrics_R2_Area_RelRMSE.xlsx', index=False)

print("Test R² mean:", np.nanmean(r2_list))
print("Test AreaRatio mean:", np.nanmean(area_list))
print("Test RelRMSE mean:", rel_rmse_mean)

pd.DataFrame(y_test_true).assign(name=test_csv['name'].values).to_excel('Test-test.xlsx', index=False)
pd.DataFrame(y_test_pred_is).assign(name=test_csv['name'].values).to_excel('Pred-test.xlsx', index=False)


In [None]:
import seaborn as sns
k = min(10, y_test_true.shape[0])
for i in range(k):
    plt.figure(figsize=(8,3), dpi=120)
    sns.lineplot(x=range(200), y=y_test_true[i,:], label='True')
    sns.lineplot(x=range(200), y=y_test_pred_is[i,:], label='Pred')
    plt.title(f"{test_csv.iloc[i]['name']}  |  R²={r2_list[i]:.3f}, RelRMSE={rel_rmse_vec[i]:.3f}")
    plt.legend(); plt.tight_layout(); plt.show()