Metric  

In [None]:
import numpy as np

def compute_rmse(img1, img2):
    return np.sqrt(np.mean((img1 - img2) ** 2))

Unarchive the dataset

In [None]:
from zipfile import ZipFile
import os

# Upload zip manually or via Google Drive
!unzip /content/archive.zip -d /content/fingerprints/

# Preview a few image paths
import glob
image_paths = glob.glob('/content/fingerprints/**/*.tif', recursive=True)  # Adjust extension if needed
print("Total images:", len(image_paths))

Preprocess images

In [None]:
import cv2
import numpy as np

def load_images(paths, size=(224, 224)):
    images = []
    for path in paths:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            img = cv2.resize(img, size)
            img = img.astype('float32') / 255.0
            images.append(img)
    images = np.array(images)
    images = np.expand_dims(images, axis=-1)  # Shape: (N, 224, 224, 1)
    return images

clean_images = load_images(image_paths)
print("Clean image shape:", clean_images.shape)

Add noise

In [None]:
def add_noise(images, noise_factor=0.3):
    noisy = images + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=images.shape)
    return np.clip(noisy, 0., 1.)

noisy_images = add_noise(clean_images)

TT Split  

In [None]:
from sklearn.model_selection import train_test_split

train_X, valid_X, train_Y, valid_Y = train_test_split(noisy_images, clean_images, test_size=0.1, random_state=42)

Convolution Auto Encoder

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D

input_img = Input(shape=(224, 224, 1))

# Encoder
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

# Decoder
x = Conv2D(64, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.summary()

Main training

In [None]:
import os
import matplotlib.pyplot as plt

# Setup
os.makedirs('checkpoints', exist_ok=True)

# Loss trackers
full_train_loss = []
full_val_loss = []

# Total training setup
total_epochs = 1000
save_interval = 200
epochs_per_phase = 100  # Can be tuned

# Training in chunks
for start_epoch in range(0, total_epochs, epochs_per_phase):
    end_epoch = min(start_epoch + epochs_per_phase, total_epochs)

    print(f"\n🧠 Training from epoch {start_epoch + 1} to {end_epoch}...\n")

    history = autoencoder.fit(
        train_X, train_Y,
        initial_epoch=start_epoch,
        epochs=end_epoch,
        batch_size=32,
        shuffle=True,
        validation_data=(valid_X, valid_Y)
    )

    # Append losses
    full_train_loss += history.history['loss']
    full_val_loss += history.history['val_loss']

    # Save checkpoint if required
    if (end_epoch % save_interval == 0) or (end_epoch == total_epochs):
        autoencoder.save(f'checkpoints/autoencoder_epoch_{end_epoch}.h5')
        print(f"✅ Model checkpoint saved at epoch {end_epoch}")



🧠 Training from epoch 1 to 100...

Epoch 1/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 6s/step - loss: 0.5586 - val_loss: 0.5456
Epoch 2/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 6s/step - loss: 0.5665 - val_loss: 0.5403
Epoch 3/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 7s/step - loss: 0.5666 - val_loss: 0.5381
Epoch 4/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 6s/step - loss: 0.5593 - val_loss: 0.5362
Epoch 5/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 6s/step - loss: 0.5536 - val_loss: 0.5354
Epoch 6/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 6s/step - loss: 0.5642 - val_loss: 0.5351
Epoch 7/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 6s/step - loss: 0.5723 - val_loss: 0.5351
Epoch 8/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 6s/step - loss: 0.5565 - val_loss: 0.5347
Epoch 9/100
[1m9/9[0m [32

Plotting curves

In [None]:
# Plot after training all 1000 epochs
epochs = range(1, len(full_train_loss) + 1)

plt.figure(figsize=(10, 6))
plt.plot(epochs, full_train_loss, label='Training Loss', color='blue')
plt.plot(epochs, full_val_loss, label='Validation Loss', color='red')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss (1 to 1000 epochs)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("loss_graph.png")
plt.show()

Evaluation mode

In [None]:
import random
from tensorflow.keras.models import load_model

# Select 5 random images from dataset
sample_indices = random.sample(range(len(clean_images)), 5)
original = clean_images[sample_indices]
noisy = add_noise(original)

model = load_model(f'checkpoints/autoencoder_epoch_1000.h5')

# Predict using each model
predictions = model.predict(noisy)

# Plot
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 8))

for i in range(5):
    rmse_before = compute_rmse(original[i], noisy[i])
    rmse_after = compute_rmse(original[i], predictions[i])

    # Row 1: Clean
    plt.subplot(3, 5, i + 1)
    plt.imshow(original[i].squeeze(), cmap='gray')
    plt.title(sample_indices[i])
    plt.axis('off')

    # Row 2: Noisy
    plt.subplot(3, 5, i + 6)
    plt.imshow(noisy[i].squeeze(), cmap='gray')
    plt.title(f"RMSE (Original vs Noisy)     : {rmse_before:.4f}")
    plt.axis('off')

    # Row 3+: Denoised by each checkpoint
    plt.subplot(3, 5, i + 11)
    plt.imshow(predictions[i].squeeze(), cmap='gray')
    plt.title(f"RMSE (Original vs Denoised)  : {rmse_after:.4f}")
    plt.axis('off')

plt.tight_layout()
plt.show()

Sreeja's test script

In [None]:
# # STEP 1: Upload & Unzip Dataset
# # -----------------------------------------------
# import zipfile
# import glob

# with zipfile.ZipFile("/content/archive.zip", 'r') as zip_ref:
#     zip_ref.extractall("/content/fingerprints")

# print("✅ Dataset extracted.")
# all_images = glob.glob("/content/fingerprints/**/*.tif", recursive=True)
# print(f"Total images found: {len(all_images)}")

# # -----------------------------------------------
# # STEP 2: Preprocessing
# # -----------------------------------------------
# import cv2
# import numpy as np

# def crop_image_from_gray(img):
#     if len(img.shape) == 3:
#         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     else:
#         gray = img
#     mask = gray > 10
#     if mask.any():
#         img = img[np.ix_(mask.any(1), mask.any(0))]
#     return img

# def process_image(path):
#     img = cv2.imread(path)
#     img = crop_image_from_gray(img)
#     img = cv2.resize(img, (128, 128))
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     img = img / 255.0
#     img = np.expand_dims(img, axis=-1)
#     return img

# processed_images = [process_image(p) for p in all_images]
# processed_images = np.array(processed_images)
# print(f"✅ Processed images shape: {processed_images.shape}")

# # -----------------------------------------------
# # STEP 3: Noise Injection
# # -----------------------------------------------
# def add_noise(image, stage):
#     noise_factors = {1: 0.1, 2: 0.2, 3: 0.3, 4: 0.3}
#     noise = np.random.normal(0, 1, image.shape)
#     noisy_image = image + noise_factors[stage] * noise

#     if stage == 4:
#         noisy_image = cv2.GaussianBlur(noisy_image.squeeze(), (5, 5), 0)
#         noisy_image = np.expand_dims(noisy_image, axis=-1)

#     noisy_image = np.clip(noisy_image, 0, 1)
#     return noisy_image

# # -----------------------------------------------
# # STEP 4: Train-Test Split
# # -----------------------------------------------
# from sklearn.model_selection import train_test_split

# train_clean, test_clean = train_test_split(processed_images, test_size=0.2, random_state=42)
# train_noisy = np.array([add_noise(img, stage=3) for img in train_clean])
# test_noisy = np.array([add_noise(img, stage=3) for img in test_clean])

# print("Train set:", train_clean.shape)
# print("Test set:", test_clean.shape)

# # -----------------------------------------------
# # STEP 5: Dataset Preparation for TensorFlow
# # -----------------------------------------------
# import tensorflow as tf

# BATCH_SIZE = 8
# BUFFER_SIZE = len(train_noisy)

# train_dataset = tf.data.Dataset.from_tensor_slices((train_noisy, train_clean))
# train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

# # -----------------------------------------------
# # STEP 6: Build U-Net Generator
# # -----------------------------------------------
# from tensorflow.keras import layers, Model

# def unet_generator():
#     inputs = layers.Input(shape=(128, 128, 1))

#     e1 = layers.Conv2D(64, 4, strides=2, padding='same')(inputs)
#     e1 = layers.LeakyReLU()(e1)

#     e2 = layers.Conv2D(128, 4, strides=2, padding='same')(e1)
#     e2 = layers.BatchNormalization()(e2)
#     e2 = layers.LeakyReLU()(e2)

#     e3 = layers.Conv2D(256, 4, strides=2, padding='same')(e2)
#     e3 = layers.BatchNormalization()(e3)
#     e3 = layers.LeakyReLU()(e3)

#     b = layers.Conv2D(512, 4, strides=2, padding='same')(e3)
#     b = layers.BatchNormalization()(b)
#     b = layers.ReLU()(b)

#     d1 = layers.Conv2DTranspose(256, 4, strides=2, padding='same')(b)
#     d1 = layers.BatchNormalization()(d1)
#     d1 = layers.Concatenate()([d1, e3])
#     d1 = layers.ReLU()(d1)

#     d2 = layers.Conv2DTranspose(128, 4, strides=2, padding='same')(d1)
#     d2 = layers.BatchNormalization()(d2)
#     d2 = layers.Concatenate()([d2, e2])
#     d2 = layers.ReLU()(d2)

#     d3 = layers.Conv2DTranspose(64, 4, strides=2, padding='same')(d2)
#     d3 = layers.BatchNormalization()(d3)
#     d3 = layers.Concatenate()([d3, e1])
#     d3 = layers.ReLU()(d3)

#     outputs = layers.Conv2DTranspose(1, 4, strides=2, padding='same', activation='sigmoid')(d3)
#     return Model(inputs, outputs)

# generator = unet_generator()

# # -----------------------------------------------
# # STEP 7: Build Discriminator
# # -----------------------------------------------
# def build_discriminator():
#     input_noisy = layers.Input(shape=(128, 128, 1))
#     input_clean = layers.Input(shape=(128, 128, 1))

#     combined = layers.Concatenate()([input_noisy, input_clean])

#     d = layers.Conv2D(64, 4, strides=2, padding='same')(combined)
#     d = layers.LeakyReLU()(d)

#     d = layers.Conv2D(128, 4, strides=2, padding='same')(d)
#     d = layers.BatchNormalization()(d)
#     d = layers.LeakyReLU()(d)

#     d = layers.Conv2D(256, 4, strides=2, padding='same')(d)
#     d = layers.BatchNormalization()(d)
#     d = layers.LeakyReLU()(d)

#     d = layers.Conv2D(1, 4, strides=1, padding='same')(d)
#     output = layers.Activation('sigmoid')(d)

#     return Model([input_noisy, input_clean], output)

# discriminator = build_discriminator()

# # -----------------------------------------------
# # STEP 8: Loss Functions and Optimizers
# # -----------------------------------------------
# bce = tf.keras.losses.BinaryCrossentropy(from_logits=False)
# l1_loss = tf.keras.losses.MeanAbsoluteError()

# def generator_loss(disc_output, gen_output, target):
#     adv_loss = bce(tf.ones_like(disc_output), disc_output)
#     l1 = l1_loss(target, gen_output)
#     return adv_loss + 100 * l1

# def discriminator_loss(disc_real, disc_generated):
#     real_loss = bce(tf.ones_like(disc_real), disc_real)
#     gen_loss = bce(tf.zeros_like(disc_generated), disc_generated)
#     return real_loss + gen_loss

# generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
# discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)

# # -----------------------------------------------
# # STEP 9: Training Step
# # -----------------------------------------------
# @tf.function
# def train_step(input_noisy, target_clean):
#     with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
#         gen_output = generator(input_noisy, training=True)
#         disc_real = discriminator([input_noisy, target_clean], training=True)
#         disc_generated = discriminator([input_noisy, gen_output], training=True)

#         gen_loss = generator_loss(disc_generated, gen_output, target_clean)
#         disc_loss = discriminator_loss(disc_real, disc_generated)

#     gradients_gen = gen_tape.gradient(gen_loss, generator.trainable_variables)
#     gradients_disc = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

#     generator_optimizer.apply_gradients(zip(gradients_gen, generator.trainable_variables))
#     discriminator_optimizer.apply_gradients(zip(gradients_disc, discriminator.trainable_variables))

#     return gen_loss, disc_loss

# # -----------------------------------------------
# # STEP 10: Full Training Loop
# # -----------------------------------------------
# import time

# EPOCHS = 50

# for epoch in range(EPOCHS):
#     start = time.time()
#     gen_losses = []
#     disc_losses = []

#     for input_noisy_batch, target_clean_batch in train_dataset:
#         gen_loss, disc_loss = train_step(input_noisy_batch, target_clean_batch)
#         gen_losses.append(gen_loss.numpy())
#         disc_losses.append(disc_loss.numpy())

#     print(f"Epoch {epoch+1}/{EPOCHS} - Gen Loss: {np.mean(gen_losses):.4f} - Disc Loss: {np.mean(disc_losses):.4f} - Time: {time.time()-start:.2f}s")

# # -----------------------------------------------
# # STEP 11: Evaluation on Test Set
# # -----------------------------------------------
# restored_test = generator.predict(test_noisy)

# import matplotlib.pyplot as plt
# import random

# test_idx = random.sample(range(len(test_clean)), 5)

# plt.figure(figsize=(15, 5))

# for i, idx in enumerate(test_idx):
#     plt.subplot(3, 5, i+1)
#     plt.imshow(test_clean[idx].squeeze(), cmap='gray')
#     plt.title("Clean")
#     plt.axis('off')

#     plt.subplot(3, 5, i+6)
#     plt.imshow(test_noisy[idx].squeeze(), cmap='gray')
#     plt.title("Noisy")
#     plt.axis('off')

#     plt.subplot(3, 5, i+11)
#     plt.imshow(restored_test[idx].squeeze(), cmap='gray')
#     plt.title("Restored")
#     plt.axis('off')

# plt.tight_layout()
# plt.show()