In [None]:
# Install Kaggle Hub library if not installed
!pip install kagglehub

import kagglehub

# Download latest version of DocUNet Dataset
path_docunet = kagglehub.dataset_download("minhbithun/docunet-dataset")
print("Path to DocUNet dataset files:", path_docunet)

# Download latest version of SROIE Dataset v2
path_sroie = kagglehub.dataset_download("urbikn/sroie-datasetv2")
print("Path to SROIE dataset files:", path_sroie)


In [None]:
import numpy as np
import cv2
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import VGG16
from sklearn.model_selection import train_test_split
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr

# Fungsi untuk memuat dan memproses gambar
def load_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (256, 256))  # Ukuran standar
    img = np.expand_dims(img, axis=-1)  # Menambah dimensi channel
    img = img.astype("float32") / 255.0  # Normalisasi
    return img

# Muat dataset DocUNet dan SROIE
docunet_images = [load_image(os.path.join(path_docunet, img)) for img in os.listdir(path_docunet)]
sroie_images = [load_image(os.path.join(path_sroie, img)) for img in os.listdir(path_sroie)]

# Gabungkan kedua dataset
images = np.array(docunet_images + sroie_images)
labels = np.array([0] * len(docunet_images) + [1] * len(sroie_images))  # Misalnya 0 untuk DocUNet, 1 untuk SROIE

# Split data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)


In [None]:
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

# Fungsi untuk memuat dan memproses gambar
def load_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Membaca gambar dalam grayscale
    img = cv2.resize(img, (512, 512))  # Resize ke 512x512
    img = img.astype("float32") / 255.0  # Normalisasi ke rentang [0, 1]
    return img

# Fungsi untuk Image Enhancement
def enhance_image(img):
    # Contrast Limited Adaptive Histogram Equalization (CLAHE)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    img_clahe = clahe.apply((img * 255).astype(np.uint8)) / 255.0  # Klahe dan normalisasi kembali

    # Denoising (Mengurangi noise)
    img_denoised = cv2.fastNlMeansDenoising(img_clahe.astype(np.uint8), None, 10, 7, 21)

    # Edge detection menggunakan Canny
    img_edges = cv2.Canny(img_denoised.astype(np.uint8), threshold1=100, threshold2=200)

    # Binarization (Thresholding) untuk teks
    _, img_binarized = cv2.threshold(img_denoised, 127, 255, cv2.THRESH_BINARY)

    return img_binarized

# Fungsi untuk Augmentasi Data
def augment_data(image):
    # Mempersiapkan objek ImageDataGenerator untuk augmentasi
    datagen = ImageDataGenerator(
        rotation_range=30,   # Rotasi gambar hingga 30 derajat
        width_shift_range=0.1,  # Translasi horizontal
        height_shift_range=0.1, # Translasi vertikal
        shear_range=0.2,     # Distorsi shear
        zoom_range=0.2,      # Scaling
        horizontal_flip=True,  # Flipping horizontal
        fill_mode='nearest'   # Mengisi area yang hilang dengan pixel terdekat
    )

    # Augmentasi gambar
    augmented_image = datagen.random_transform(image)
    return augmented_image

# Fungsi untuk memisahkan dataset menjadi training, validation, dan testing
def split_dataset(images, test_size=0.3):
    # Membagi dataset menjadi 70% pelatihan, 15% validasi, 15% pengujian
    X_train, X_temp = train_test_split(images, test_size=test_size, random_state=42)
    X_val, X_test = train_test_split(X_temp, test_size=0.5, random_state=42)

    return X_train, X_val, X_test

# Path untuk gambar (ganti dengan path sebenarnya)
path_docunet = 'path_to_docunet_images'  # Ganti dengan path dataset DocUNet
path_sroie = 'path_to_sroie_images'      # Ganti dengan path dataset SROIE

# Memuat semua gambar dari kedua dataset
docunet_images = [load_image(os.path.join(path_docunet, img)) for img in os.listdir(path_docunet)]
sroie_images = [load_image(os.path.join(path_sroie, img)) for img in os.listdir(path_sroie)]

# Gabungkan gambar dari kedua dataset
images = np.array(docunet_images + sroie_images)

# Enhancing gambar (CLAHE, denoising, edge detection, binarization)
enhanced_images = np.array([enhance_image(img) for img in images])

# Augmentasi data untuk setiap gambar
augmented_images = np.array([augment_data(img) for img in enhanced_images])

# Pisahkan dataset menjadi training, validation, dan testing
X_train, X_val, X_test = split_dataset(augmented_images, test_size=0.3)

# Tampilkan beberapa contoh gambar hasil preprocessing
for i in range(3):
    plt.figure(figsize=(12, 6))

    # Gambar asli
    plt.subplot(1, 3, 1)
    plt.imshow(images[i], cmap='gray')
    plt.title("Original Image")

    # Gambar setelah enhancement
    plt.subplot(1, 3, 2)
    plt.imshow(enhanced_images[i], cmap='gray')
    plt.title("Enhanced Image")

    # Gambar setelah augmentasi
    plt.subplot(1, 3, 3)
    plt.imshow(augmented_images[i], cmap='gray')
    plt.title("Augmented Image")

    plt.show()

# Print size of datasets
print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")


In [None]:
# Attention Mechanism Layer
class AttentionLayer(layers.Layer):
    def __init__(self, channels):
        super(AttentionLayer, self).__init__()
        self.attention = layers.Conv2D(channels, (1, 1), activation='sigmoid')

    def call(self, inputs):
        attention_weights = self.attention(inputs)
        return inputs * attention_weights

# Residual Block
def residual_block(x, filters):
    res = layers.Conv2D(filters, (3, 3), padding='same')(x)
    res = layers.ReLU()(res)
    res = layers.Conv2D(filters, (3, 3), padding='same')(res)
    return layers.add([x, res])

# Model DocUNet
def create_docunet_model(input_shape=(256, 256, 1)):
    inputs = layers.Input(shape=input_shape)

    # Encoder: Convolutional layers
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)

    x = residual_block(x, 64)

    # Attention Mechanism Layer
    x = AttentionLayer(64)(x)

    # Deeper Convolutional Layers
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)

    # Decoder: Upsampling and convolution
    x = layers.Conv2DTranspose(64, (3, 3), strides=2, activation='relu', padding='same')(x)
    x = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

    model = models.Model(inputs, x)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Membuat model
model = create_docunet_model()
model.summary()


In [None]:
# Melatih model
history = model.fit(X_train, X_train, epochs=10, batch_size=16, validation_data=(X_test, X_test))


In [None]:
# Fungsi untuk mengevaluasi SSIM dan PSNR
def evaluate_metrics(y_true, y_pred):
    ssim_score = ssim(y_true, y_pred, data_range=y_pred.max() - y_pred.min())
    psnr_score = psnr(y_true, y_pred)
    return ssim_score, psnr_score

# Uji pada data test
y_pred = model.predict(X_test)

# Evaluasi metrik SSIM dan PSNR untuk beberapa gambar
ssim_scores = []
psnr_scores = []
for i in range(len(X_test)):
    ssim_score, psnr_score = evaluate_metrics(X_test[i], y_pred[i])
    ssim_scores.append(ssim_score)
    psnr_scores.append(psnr_score)

# Hitung rata-rata SSIM dan PSNR
average_ssim = np.mean(ssim_scores)
average_psnr = np.mean(psnr_scores)

print("Average SSIM:", average_ssim)
print("Average PSNR:", average_psnr)


In [None]:
# Menyimpan model
model.save('docunet_model.h5')

# Menampilkan hasil rekonstruksi pada beberapa gambar
import matplotlib.pyplot as plt

for i in range(5):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(X_test[i].squeeze(), cmap='gray')
    plt.title("Original Image")

    plt.subplot(1, 2, 2)
    plt.imshow(y_pred[i].squeeze(), cmap='gray')
    plt.title("Reconstructed Image")

    plt.show()
