# Siamese network for detecting misprinted photos
## Loading the data
### The data has two folders. One folder has normal photos. The other has misprinted photos

In [None]:
!pip install gdown

Collecting gdown
  Downloading gdown-5.1.0-py3-none-any.whl.metadata (5.7 kB)
Downloading gdown-5.1.0-py3-none-any.whl (17 kB)
Installing collected packages: gdown
Successfully installed gdown-5.1.0


In [None]:
!gdown --id 1VMottI0IyzCloyLXL7uIRWPTJj7atAFf

Downloading...
From (original): https://drive.google.com/uc?id=1VMottI0IyzCloyLXL7uIRWPTJj7atAFf
From (redirected): https://drive.google.com/uc?id=1VMottI0IyzCloyLXL7uIRWPTJj7atAFf&confirm=t&uuid=d9fc736d-11d6-47bb-ac59-270e1bbe1cee
To: /kaggle/working/text3.zip
100%|███████████████████████████████████████| 1.03G/1.03G [00:09<00:00, 112MB/s]


In [None]:
!unzip text3.zip

Archive:  text3.zip
   creating: Text 3 images/
   creating: Text 3 images/Text 3 - Training images/
   creating: Text 3 images/Text 3 - Training images/Pass/
  inflating: Text 3 images/Text 3 - Training images/Pass/01 - Copy.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/01.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/02 - Copy.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/02.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/03.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/03 - Copy.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/04 - Copy.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/04.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/Image_20240120131444480.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/Image_20240120131444480 - Copy.bmp  
  inflating: Text 3 images/Text 3 - Training images/Pass/Image_20240120131439667.bmp  
  inflating: Text

# Importing necessary libraries

In [None]:
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import binary_crossentropy
import numpy as np
import os
from PIL import Image
import cv2
import keras
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers
import tensorflow as tf
# Function to load and preprocess images
def load_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))  # Resize images as per your network requirements
    img = np.array(img) / 255.0  # Normalize pixel values to [0, 1]
    return img



2024-02-07 11:26:17.791259: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-07 11:26:17.791364: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-07 11:26:17.931720: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Preprocessing

In [None]:
#Paths of the two folders mentioned in the beginning
normal_folder = "/kaggle/working/Text 3 images/Text 3 - Training images/Pass"
misprint_folder = "/kaggle/working/Text 3 images/Text 3 - Training images/Missing Text"

In [None]:
#Gets paths of all files in a directory
def get_image_paths(directory):
    image_paths = []
    for filename in os.listdir(directory):
            image_paths.append(os.path.join(directory, filename))
    return image_paths

In [None]:
#Getting paths of all the normal and misprinted images in two separate lists
normal_images_paths = get_image_paths(normal_folder)
misprinted_images_paths = get_image_paths(misprint_folder)


## Splitting data paths into paths for train, test and val sets

In [None]:

import random

def split_paths(paths, train_ratio=0.7, val_ratio=0.15, seed=42):
    """
    Split a list of paths into train, validation, and test sets.

    Args:
    - paths (list): List of paths to be split.
    - train_ratio (float): Ratio of training set size (default: 0.7).
    - val_ratio (float): Ratio of validation set size (default: 0.15).
    - seed (int): Seed for reproducibility (default: None).

    Returns:
    - train_paths (list): List of paths for the training set.
    - val_paths (list): List of paths for the validation set.
    - test_paths (list): List of paths for the test set.
    """
    # Set seed for reproducibility if provided
    if seed is not None:
        random.seed(seed)

    # Shuffle the list of paths
    random.shuffle(paths)

    # Calculate sizes for train, validation, and test sets
    total_size = len(paths)
    train_size = int(train_ratio * total_size)
    val_size = int(val_ratio * total_size)

    # Split the shuffled list into train, validation, and test sets
    train_paths = paths[:train_size]
    val_paths = paths[train_size:train_size + val_size]
    test_paths = paths[train_size + val_size:]

    # Ensure test_paths gets the remaining items if sizes don't sum up to total_size
    if len(test_paths) + len(val_paths) + len(train_paths) != total_size:
        test_paths += paths[train_size + val_size + len(test_paths):]

    return train_paths, val_paths, test_paths



In [None]:
# Getting the split paths
train_normal_paths, val_normal_paths, test_normal_paths = split_paths(normal_images_paths)
train_misprinted_paths, val_misprinted_paths, test_misprinted_paths = split_paths(misprinted_images_paths)


## Triplets generator function
### This function generates triplets for triplet loss training. It takes lists of file paths for normal and misprinted images, and the number of triplets to generate as input arguments. It randomly selects an anchor image from one of the classes ('Pass' or 'Missing Text'), then selects a positive image from the same class and a negative image from the other class. Finally, it loads these images and returns them as triplets.

In [None]:
# Function to generate triplets
def generate_triplets(normal_image_paths,misprinted_images_paths, num_triplets):
    triplet_pairs = []
    classes = ['Pass','Missing Text']


    for _ in range(num_triplets):
        # Select a random class for anchor
        anchor_class = np.random.choice(classes)

        if(anchor_class == 'Pass'):
          # Select a random anchoro from Pass
          anchor_path = np.random.choice(normal_image_paths)
          # Select a positive image from the pass
          positive_image_path = np.random.choice(normal_image_paths)
          # Make sure anchor and positive image are not the same
          while (anchor_path == positive_image_path):
                positive_image_path = np.random.choice(normal_image_paths)
          # Select a positive image from the Missing Text
          negative_image_path = np.random.choice(misprinted_images_paths)
          # load images
          anchor = load_image(anchor_path)
          positive = load_image(positive_image_path)
          negative = load_image(negative_image_path)

        elif (anchor_class == 'Missing Text'):
          # select a random anchor from Missing Text
          anchor_path = np.random.choice(misprinted_images_paths)
          # Select a random positive image from Missing Text
          positive_image_path = np.random.choice(misprinted_images_paths)
          # Make sure anchor and positive image are not the same
          while (anchor_path == positive_image_path):
                positive_image_path = np.random.choice(misprinted_images_paths)
          # Select a random negative image from Pass
          negative_image_path = np.random.choice(normal_image_paths)

          # Load images
          anchor = load_image(anchor_path)
          positive = load_image(positive_image_path)
          negative = load_image(negative_image_path)


        triplet_pairs.append((anchor, positive, negative))

    return np.array(triplet_pairs)


# Loading a pre-trained mobilenet without the last layer

In [None]:
from keras.applications import MobileNetV2
from keras import layers
target_shape = (224,224)
model = MobileNetV2(
    input_shape=(224,224,3),
    alpha=1.0,
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [None]:
# Adding some convolutional layers to it to increase the final output to more than 1000 units
last_layer = model.layers[-1].output
out0 =layers.Conv2D(filters=24, kernel_size=(1, 1), padding='same')(last_layer)
out =layers.Flatten()(out0)

In [None]:
fin =  Model(inputs=model.input, outputs=out)

In [None]:
# Only keeping  the last 5 layers trainable
for layer in fin.layers[:-5]:
    layer.trainable = False

In [None]:
#fin.summary()

### Triplet loss function

In [None]:
def triplet_loss(anchor, positive, negative, alpha=0.5):
    pos_dist = K.sum(K.square(anchor - positive), axis=-1)
    neg_dist = K.sum(K.square(anchor - negative), axis=-1)
    basic_loss = pos_dist - neg_dist + alpha
    loss = K.maximum(basic_loss, 0.0)

    # Take the mean or sum across the batch dimension
    loss = K.mean(loss)  # or K.sum(loss)

    return loss

## Training Loop Description

The training loop iterates over the specified number of epochs. Within each epoch:

1. **Data Generation:**
   - Generate triplets for training using `generate_triplets()` function with normal and misprinted image paths.
   - Generate validation triplets using a smaller number of samples for validation.
   
2. **Forward Pass:**
   - Perform a forward pass through the Siamese network (denoted as `fin`) with the anchor, positive, and negative images to obtain embeddings.
   
3. **Triplet Loss Calculation:**
   - Calculate the triplet loss using the obtained anchor, positive, and negative embeddings.
   
4. **Backward Pass:**
   - Compute gradients of the loss with respect to trainable variables using `tf.GradientTape`.
   - Update model weights using the optimizer (`optimizer`) based on the computed gradients.
   
5. **Validation:**
   - Obtain embeddings for validation triplets.
   - Calculate validation triplet loss.
   - Save the weights if the current validation loss is the best seen so far (`best_val_loss`).
   
6. **Monitoring and Saving:**
   - Append training and validation losses to their respective lists.
   - Print the training and validation losses for monitoring purposes.
   - Save the weights periodically (`current_siamese.h5`) and if validation loss improves (`best_siamese.h5`).
   
7. **Resource Management:**
   - Delete the tape to free up resources after each iteration.
   
8. **Completion:**
   - After all epochs are completed, print "Training complete."



In [None]:
# Training parameters
optimizer = optimizers.Adam(learning_rate=0.001)
epochs = 100
best_val_loss = 2000
num_triplets = 50
train_losses= []
val_losses = []

In [None]:

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    triplets = generate_triplets(train_normal_paths,train_misprinted_paths, num_triplets)

    val_triplets = generate_triplets(val_normal_paths,val_misprinted_paths, 20)
    anchors = triplets[:, 0]
    positives = triplets[:, 1]
    negatives = triplets[:, 2]

    # Forward pass to get embeddings
    with tf.GradientTape(persistent=True) as tape:
        anchor_embeddings = fin(anchors)
        positive_embeddings = fin(positives)
        negative_embeddings = fin(negatives)

        # Calculate triplet loss
        loss_value = triplet_loss(anchor_embeddings, positive_embeddings, negative_embeddings)

    # Backward pass
    gradients = tape.gradient(loss_value, fin.trainable_variables)
    optimizer.apply_gradients(zip(gradients, fin.trainable_variables))

    # Print loss for monitoring


    val_anchors = val_triplets[:, 0]
    val_positives = val_triplets[:, 1]
    val_negatives = val_triplets[:, 2]
    val_anchor_embeddings = fin.predict(val_anchors)
    val_positive_embeddings = fin.predict(val_positives)
    val_negative_embeddings = fin.predict(val_negatives)
    val_loss_value = triplet_loss(val_anchor_embeddings, val_positive_embeddings, val_negative_embeddings)

    if(val_loss_value.numpy() <= best_val_loss):
      fin.save_weights("best_siamese.h5")
    train_losses.append(loss_value.numpy())
    val_losses.append(val_loss_value.numpy())
    if (epoch%10 ==0):
        print(f"Epoch {epoch + 1}, Train Loss: {loss_value.numpy()}")
        print(f"Epoch {epoch + 1}, Val Loss: {val_loss_value.numpy()}")
        fin.save_weights("current_siamese.h5")

    # Delete the tape to free up resources
    del tape

print("Training complete.")


# Plotting loss curves.
(I lost the plot accidentally. You can retrain the model and plot if necessary)

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.legend()
plt.show()

# Loading the best saved model

In [None]:
weights_path = '/kaggle/input/best-model/best_siamese.h5'

# Load the weights into the model
fin.load_weights(weights_path,by_name=True)

### The next function uses the best loaded model and compares a test image with one misprinted and normal image and returns 1 if the distance of test image from misprinted image in less than that from the normal image. Otherwise, it returns zero

In [None]:

def sub_pred(test_image):
    pos = load_image(np.random.choice(train_misprinted_paths))
    neg = load_image(np.random.choice(train_normal_paths))

    anchor_embeddings = fin(tf.expand_dims(test_image,axis=0))
    positive_embeddings = fin(tf.expand_dims(pos,axis=0))
    negative_embeddings = fin(tf.expand_dims(neg,axis=0))
    pos_dist = K.sum(K.square(anchor_embeddings - positive_embeddings), axis=-1).numpy()[0]
    neg_dist = K.sum(K.square(anchor_embeddings - negative_embeddings), axis=-1).numpy()[0]
    if (pos_dist < neg_dist):
        return 1
    else:
        return 0

### Below is a function that performs prediction on all of the training set.
#### For each entry, it performs the majority voting of 7 predictions given by the function above

In [None]:
def metric_calculator():
    mis_print_preds = []
    for path in test_misprinted_paths:
        test_image = load_image(path)
        pred1 = sub_pred(test_image)
        pred2 = sub_pred(test_image)
        pred3 = sub_pred(test_image)
        pred4 = sub_pred(test_image)
        pred5 = sub_pred(test_image)
        pred6 = sub_pred(test_image)
        pred7 = sub_pred(test_image)

        mis_print_preds.append((pred1 + pred2 + pred3+pred4 + pred5 + pred6+pred7)//4)
    normal_preds = []
    for path in test_normal_paths:
        test_image = load_image(path)
        pred1 = sub_pred(test_image)
        pred2 = sub_pred(test_image)
        pred3 = sub_pred(test_image)
        pred4 = sub_pred(test_image)
        pred5 = sub_pred(test_image)
        pred6 = sub_pred(test_image)
        pred7 = sub_pred(test_image)


        normal_preds.append((pred1 + pred2 + pred3+pred4 + pred5 + pred6+pred7)//4)
    return mis_print_preds,normal_preds



### Evaluation on the test set

In [None]:
mis_print_preds,normal_preds = metric_calculator()

### Combining predictions and making ground truths

In [None]:
mis_print_preds = np.asarray(mis_print_preds)
ground_truth_mis_print = np.ones_like(mis_print_preds)
normal_preds = np.asarray(normal_preds)
ground_truth_normal = np.zeros_like(normal_preds)

preds = np.zeros(len(mis_print_preds)+len(normal_preds))
preds[0:len(mis_print_preds)] = (mis_print_preds)
preds[len(mis_print_preds) : ] = normal_preds

gt = np.zeros(len(mis_print_preds)+len(normal_preds))
gt[0:len(mis_print_preds)] =  ground_truth_mis_print

# Accuracy on the test set

In [None]:
from sklearn.metrics import accuracy_score
total_accuracy = accuracy_score(preds, gt)

print("Total accuracy:", total_accuracy)

Total accuracy: 0.8846153846153846


In [None]:
# Saving the whole  model
fin.save("model.h5")

  saving_api.save_model(
