In [None]:
# https://www.kaggle.com/code/tanh2k2k/vietnam-history-image-colorization

In [None]:
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"

In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.models import Model
from tqdm import tqdm
import pandas as pd
from skimage.metrics import peak_signal_noise_ratio, structural_similarity

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.set_logical_device_configuration(
            gpus[0],
            [tf.config.LogicalDeviceConfiguration(memory_limit=3500)]
        )
    except RuntimeError as e:
        print(e)

In [None]:
vgg_model = None
resnet_model = None

def mse_loss(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))

def build_vgg_model(layer_names):
    global vgg_model
    if vgg_model is None:
        vgg = VGG16(weights="imagenet", include_top=False)
        vgg.trainable = False
        outputs = [vgg.get_layer(name).output for name in layer_names]
        vgg_model = Model(inputs=vgg.input, outputs=outputs)
    return vgg_model

def build_resnet_model(layer_names):
    global resnet_model
    if resnet_model is None:
        resnet = ResNet50(weights="imagenet", include_top=False)
        resnet.trainable = False
        outputs = [resnet.get_layer(name).output for name in layer_names]
        resnet_model = Model(inputs=resnet.input, outputs=outputs)
    return resnet_model

def perceptual_loss_vgg(y_true, y_pred):
    vgg_layers = ["block1_conv2", "block2_conv2", "block3_conv3"]
    vgg_model = build_vgg_model(vgg_layers)

    y_true_lab = tf.concat([tf.zeros_like(y_true[:, :, :, :1]), y_true], axis=-1)
    y_pred_lab = tf.concat([tf.zeros_like(y_pred[:, :, :, :1]), y_pred], axis=-1)

    y_true_features = vgg_model(y_true_lab)
    y_pred_features = vgg_model(y_pred_lab)

    loss = tf.reduce_sum([tf.reduce_mean(tf.square(f_true - f_pred))
                          for f_true, f_pred in zip(y_true_features, y_pred_features)])
    return loss

def perceptual_loss_resnet(y_true, y_pred):
    resnet_layers = ["conv1_relu", "conv2_block3_out", "conv3_block4_out"]
    resnet_model = build_resnet_model(resnet_layers)

    y_true_lab = tf.concat([tf.zeros_like(y_true[:, :, :, :1]), y_true], axis=-1)
    y_pred_lab = tf.concat([tf.zeros_like(y_pred[:, :, :, :1]), y_pred], axis=-1)

    y_true_features = resnet_model(y_true_lab)
    y_pred_features = resnet_model(y_pred_lab)

    loss = tf.reduce_sum([tf.reduce_mean(tf.square(f_true - f_pred))
                          for f_true, f_pred in zip(y_true_features, y_pred_features)])
    return loss

def preprocess_image(image_path, target_size=(224, 224)):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    original_size = img.shape[:2]
    L, A, B = cv2.split(img)
    L_resized = cv2.resize(L, target_size)

    L_resized = L_resized.astype("float32") / 255.0
    A_resized = (cv2.resize(A, target_size).astype("float32") - 128) / 128.0
    B_resized = (cv2.resize(B, target_size).astype("float32") - 128) / 128.0

    return L_resized.reshape(224, 224, 1), np.stack([A_resized, B_resized], axis=-1), L, original_size

def colorize_image(model, image_path):
    L_resized, _, L_original, original_size = preprocess_image(image_path)
    L_input = np.expand_dims(L_resized, axis=0)

    ab_pred = model.predict(L_input)[0]
    ab_pred = (ab_pred * 128 + 128).astype("uint8")

    ab_pred_resized = cv2.resize(ab_pred, (original_size[1], original_size[0]), interpolation=cv2.INTER_CUBIC)

    colorized_img = cv2.merge([L_original, ab_pred_resized[:, :, 0], ab_pred_resized[:, :, 1]])
    colorized_img = cv2.cvtColor(colorized_img, cv2.COLOR_LAB2BGR)
    colorized_img = cv2.cvtColor(colorized_img, cv2.COLOR_BGR2RGB)

    return colorized_img

In [None]:
mse_model = tf.keras.models.load_model("./models/colorization_model_mse_final", custom_objects={"mse_loss": mse_loss})
vgg_model = tf.keras.models.load_model("./models/colorization_model_perceptual_final", custom_objects={"perceptual_loss": perceptual_loss_vgg})
resnet_model = tf.keras.models.load_model("./models/colorization_model_perceptual_resnet_final", custom_objects={"perceptual_loss": perceptual_loss_resnet})

In [None]:
def evaluate_mae(original_path, predicted_image):
    original_img = cv2.imread(original_path)
    original_rgb = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
    predicted_resized = cv2.resize(predicted_image, (original_rgb.shape[1], original_rgb.shape[0]))
    mae = np.mean(np.abs(original_rgb.astype("float32") - predicted_resized.astype("float32")))
    return mae

In [None]:
def evaluate_psnr(original_path, predicted_image):
    original_img = cv2.imread(original_path)
    original_rgb = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
    predicted_resized = cv2.resize(predicted_image, (original_rgb.shape[1], original_rgb.shape[0]))
    psnr = peak_signal_noise_ratio(original_rgb, predicted_resized, data_range=255)
    return psnr

In [None]:
def evaluate_ssim(original_path, predicted_image):
    original_img = cv2.imread(original_path)
    original_rgb = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
    predicted_resized = cv2.resize(predicted_image, (original_rgb.shape[1], original_rgb.shape[0]))
    ssim = structural_similarity(original_rgb, predicted_resized, channel_axis=2)
    return ssim

In [None]:
image_folder = "./DATASET_TEST"
image_list = sorted([os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(".jpg")])

In [None]:
results = []

for image_path in tqdm(image_list):
    filename = os.path.basename(image_path)

    result_mse = colorize_image(mse_model, image_path)
    result_vgg = colorize_image(vgg_model, image_path)
    result_resnet = colorize_image(resnet_model, image_path)

    mae_mse = evaluate_mae(image_path, result_mse)
    mae_vgg = evaluate_mae(image_path, result_vgg)
    mae_resnet = evaluate_mae(image_path, result_resnet)

    results.append({
        "filename": filename,
        "mae_mse": mae_mse,
        "mae_vgg": mae_vgg,
        "mae_resnet": mae_resnet,
    })

df = pd.DataFrame(results)
df.to_csv("mae_metrics.csv", index=False)

In [None]:
results = []

for image_path in tqdm(image_list):
    filename = os.path.basename(image_path)

    result_mse = colorize_image(mse_model, image_path)
    result_vgg = colorize_image(vgg_model, image_path)
    result_resnet = colorize_image(resnet_model, image_path)

    ssim_mse = evaluate_ssim(image_path, result_mse)
    ssim_vgg = evaluate_ssim(image_path, result_vgg)
    ssim_resnet = evaluate_ssim(image_path, result_resnet)

    results.append({
        "filename": filename,
        "ssim_mse": ssim_mse,
        "ssim_vgg": ssim_vgg,
        "ssim_resnet": ssim_resnet,
    })

df = pd.DataFrame(results)
df.to_csv("ssim_metrics.csv", index=False)

In [None]:
results = []

for image_path in tqdm(image_list):
    filename = os.path.basename(image_path)

    result_mse = colorize_image(mse_model, image_path)
    result_vgg = colorize_image(vgg_model, image_path)
    result_resnet = colorize_image(resnet_model, image_path)

    psnr_mse = evaluate_psnr(image_path, result_mse)
    psnr_vgg = evaluate_psnr(image_path, result_vgg)
    psnr_resnet = evaluate_psnr(image_path, result_resnet)

    results.append({
        "filename": filename,
        "psnr_mse": psnr_mse,
        "psnr_vgg": psnr_vgg,
        "psnr_resnet": psnr_resnet,
    })

df = pd.DataFrame(results)
df.to_csv("psnr_metrics.csv", index=False)

In [1]:
import pandas as pd
from IPython.display import display

# Đọc các file metrics
mae_df = pd.read_csv("./evaluation/mae_metrics.csv")
psnr_df = pd.read_csv("./evaluation/psnr_metrics.csv")
ssim_df = pd.read_csv("./evaluation/ssim_metrics.csv")

# Tính trung bình
mae_mean = mae_df[["mae_mse", "mae_vgg", "mae_resnet"]].mean()
psnr_mean = psnr_df[["psnr_mse", "psnr_vgg", "psnr_resnet"]].mean()
ssim_mean = ssim_df[["ssim_mse", "ssim_vgg", "ssim_resnet"]].mean()

# Tạo bảng kết quả
result_df = pd.DataFrame({
    "MAE": [mae_mean["mae_mse"], mae_mean["mae_vgg"], mae_mean["mae_resnet"]],
    "PSNR": [psnr_mean["psnr_mse"], psnr_mean["psnr_vgg"], psnr_mean["psnr_resnet"]],
    "SSIM": [ssim_mean["ssim_mse"], ssim_mean["ssim_vgg"], ssim_mean["ssim_resnet"]],
}, index=["MSE", "Perceptual VGG", "Perceptual ResNet"])

# Hiển thị bảng
display(result_df.round(4))

Unnamed: 0,MAE,PSNR,SSIM
MSE,5.5189,30.3006,0.9555
Perceptual VGG,6.0318,29.5798,0.9533
Perceptual ResNet,5.7462,29.9902,0.9513
