In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
import os
import pathlib
from sklearn.model_selection import train_test_split


import pandas as pd
import sqlite3
from datetime import date
import numpy as np

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, sizeX:tuple, sizeY:tuple):
        super().__init__()
        self._sizeX = sizeX  # input
        self._sizeY = sizeY  # output
        self._model = self._BuildModel()
        self._learner = self._BuildLearner()

    def call(self, x:tf.Tensor, training:bool=False) -> tf.Tensor:
        prediction = self._model(x, training=training)
        return prediction

    @tf.function
    def Train(self, x:tf.Tensor, y:tf.Tensor): #update the model's weights
        with tf.GradientTape() as tape:
            prediction = self.__call__(x)
            loss = self._learner["get_loss"](prediction, y)
        gradient = tape.gradient(loss, self._model.trainable_variables)
        self._learner["optimize"].apply_gradients(zip(gradient, self._model.trainable_variables))

    @tf.function
    def Validate(self, x:tf.Tensor, y:tf.Tensor) -> tf.Tensor: #evaluating the model's performance
        prediction = self.__call__(x, training=False)
        review = tf.math.in_top_k(tf.math.argmax(y,axis=1), prediction, 1)
        accuracy = tf.math.reduce_mean(tf.cast(review, dtype="float32"))
        return accuracy

    def _BuildModel(self) -> tf.keras.Model:  # 8 layer doesn't include input layer(else 9)
        tensorInput = tf.keras.Input(shape=self._sizeX) #input layer
        featureMaps = tf.keras.layers.Lambda(lambda x: x/127.5-1.0)(tensorInput) #This is a Lambda layer that normalizes the input data.
        featureMaps = tf.keras.layers.Conv2D(filters=16, kernel_size=[3,3], activation="relu")(featureMaps)
        featureMaps = tf.keras.layers.Conv2D(filters=32, kernel_size=[1,3], activation="relu")(featureMaps)
        featureMaps = tf.keras.layers.Conv2D(filters=64, kernel_size=[1,3], activation="relu")(featureMaps)
        featureMaps = tf.keras.layers.Conv2D(filters=128, kernel_size=[1,3], activation="relu")(featureMaps) #capture more complex patterns in the later stages of the network
        featureMaps = tf.keras.layers.GlobalAveragePooling2D()(featureMaps) # applies Global Average Pooling, help reduces the number of parameters and helps to prevent overfitting.
        featureMaps = tf.keras.layers.Dropout(0.5)(featureMaps ) #prevent overfitting,

        result = tf.keras.layers.Dense(units=self._sizeY[0], activation="softmax")(featureMaps) #output layers, softmax activation function is used to convert the outputs into probability scores for each class, making it suitable for multi-class classification tasks.
        model = tf.keras.Model(tensorInput, result) #takes tensorInput as input and gives result as output
        return model

    def _BuildLearner(self) -> dict:
        loser = lambda p, y: tf.reduce_mean(-tf.reduce_sum(y*tf.math.log(p+1e-13),axis=1))
        optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
        learner = {"get_loss": loser, "optimize": optimizer}
        return learner
        #creates a loss function
        #optimizer (Adam with a learning rate of 1e-3).



In [None]:
def PrepareDataset(dataDir:str, batchSize:int=10) -> dict:
    dataDirDict = {"train":pathlib.Path(dataDir)/"Train", "valid":pathlib.Path(dataDir)/"Valid"}

    classes = sorted(['passport','national_id_card', 'house_register', 'driver_license'])
    classIdxDict = dict(zip(classes,range(len(classes))))

    paths = {"train":list(), "valid":list()}
    labels = {"train":list(), "valid":list()}
    for eachSet, eachDir in dataDirDict.items():
        for eachPath in eachDir.rglob("*"):
            if eachPath.is_file():
                paths[eachSet].append(str(eachPath))
                labels[eachSet].append(classIdxDict[eachPath.parts[-2]])

    dataset = dict()
    decoder = lambda x, y: [tf.image.resize(tf.image.decode_jpeg(tf.io.read_file(x), channels=3), [224, 224]), tf.one_hot(y, len(classIdxDict))]
    for eachSet in dataDirDict.keys():
        eachTFData = tf.data.Dataset.from_tensor_slices((paths[eachSet],labels[eachSet]))
        dataset.update({eachSet:eachTFData})
        dataset[eachSet] = dataset[eachSet].shuffle(len(paths[eachSet]), reshuffle_each_iteration=True)
        dataset[eachSet] = dataset[eachSet].map(decoder, num_parallel_calls=tf.data.AUTOTUNE)
        dataset[eachSet] = dataset[eachSet].batch(batchSize, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

    return dataset


# Fine tuning


In [None]:
if __name__== "__main__":
    print("Preparing dataset...")
    dataset = PrepareDataset(r"/content/drive/MyDrive/doc", batchSize=10)
    print("Build the CNN model...")
    newModel = tf.keras.models.load_model('/content/drive/MyDrive/code/model_up')

    print("Start training...")
    best_valid_perf = 96
    best_train_perf = 96
    for epoch in range(15):
        perfDict = {"train":[], "valid":[]}
        for inData, outData in dataset["train"]:
            newModel.Train(inData, outData)
        for inData, outData in dataset["train"]:
            perfDict["train"].append(newModel.Validate(inData, outData))
        for inData, outData in dataset["valid"]:
            perfDict["valid"].append(newModel.Validate(inData, outData))
        trainPerf = tf.math.reduce_mean(perfDict["train"]) * 100
        validPerf = tf.math.reduce_mean(perfDict["valid"]) * 100
        if validPerf > best_valid_perf and trainPerf > best_train_perf:
            best_valid_perf = validPerf
            best_train_perf = trainPerf
            newModel.save('/content/drive/MyDrive/code/model_up',save_format='tf')
            print('savee model')
        print(f"Epoch: {epoch},    Train perf: {trainPerf:.2f},    Valid perf: {validPerf:.2f}")
    print("Completed!", flush=True)

Preparing dataset...
Build the CNN model...




Start training...
Epoch: 0,    Train perf: 95.79,    Valid perf: 93.33
Epoch: 1,    Train perf: 95.26,    Valid perf: 96.67
Epoch: 2,    Train perf: 94.74,    Valid perf: 96.67
Epoch: 3,    Train perf: 96.84,    Valid perf: 93.33
Epoch: 4,    Train perf: 94.21,    Valid perf: 93.33
Epoch: 5,    Train perf: 95.26,    Valid perf: 93.33
Epoch: 6,    Train perf: 94.21,    Valid perf: 90.00




savee model
Epoch: 7,    Train perf: 97.37,    Valid perf: 96.67
Epoch: 8,    Train perf: 97.89,    Valid perf: 93.33
Epoch: 9,    Train perf: 92.63,    Valid perf: 90.00
Epoch: 10,    Train perf: 95.79,    Valid perf: 93.33
Epoch: 11,    Train perf: 95.26,    Valid perf: 93.33
Epoch: 12,    Train perf: 96.32,    Valid perf: 96.67
Epoch: 13,    Train perf: 93.68,    Valid perf: 93.33
Epoch: 14,    Train perf: 97.37,    Valid perf: 93.33
Completed!


In [None]:
# Evaluate overall training accuracy
train_accuracy = []
for inData, outData in dataset["train"]:
    train_accuracy.append(newModel.Validate(inData, outData))
overall_train_accuracy = tf.reduce_mean(train_accuracy)

# Evaluate overall validation accuracy
valid_accuracy = []
for inData, outData in dataset["valid"]:
    valid_accuracy.append(newModel.Validate(inData, outData))
overall_valid_accuracy = tf.reduce_mean(valid_accuracy)

print(f"Overall train accuracy: {overall_train_accuracy*100:.2f}")
print(f"Overall valid accuracy: {overall_valid_accuracy*100:.2f}")


Overall train accuracy: 96.84
Overall valid accuracy: 96.67


In [None]:
!pip install db-sqlite3

Collecting db-sqlite3
  Downloading db-sqlite3-0.0.1.tar.gz (1.4 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting db (from db-sqlite3)
  Downloading db-0.1.1.tar.gz (3.4 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting antiorm (from db->db-sqlite3)
  Downloading antiorm-1.2.1.tar.gz (171 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.0/172.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: db-sqlite3, db, antiorm
  Building wheel for db-sqlite3 (setup.py) ... [?25l[?25hdone
  Created wheel for db-sqlite3: filename=db_sqlite3-0.0.1-py3-none-any.whl size=1769 sha256=c5e0215ebfddbda5f56f005c482c9fa15d8201b34c501952468e6e91899b5277
  Stored in directory: /root/.cache/pip/wheels/a6/b7/83/e941e0a0e04f417982e718ae7295d1e82b5f2863a1c51edd71
  Building wheel for db (setup.py) ... [?25l[?25hdone
  Created wheel for db: filename=db-0.1.1

In [None]:
def classify_image(image_path, model, class_names, confidence_threshold=0.6):
    # Convert image_path to string
    image_path = str(image_path)

    # Load and preprocess the image
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [224, 224])
    img = img[None, ...]  # Add a batch dimension

    # Pass the image through the model and get the predicted probabilities
    predictions = model(img)
    predicted_class_index = np.argmax(predictions, axis=-1)
    predicted_probability = np.max(predictions)
    print(predicted_probability)


    # Check if the predicted probability is below the threshold
    if predicted_probability < confidence_threshold:
        return 'other'
    else:
        return class_names[predicted_class_index[0]]

if __name__== "__main__":
    db_file = 'result.db'

    # delete result.db

    # if os.path.exists(db_file):
    #   os.remove(db_file)

    # Connect to the SQLite database or create a new one
    conn = sqlite3.connect(db_file)

    # Create a cursor object to execute SQL queries
    c = conn.cursor()

    # Create a table for storing file names and results, with file_name being a PRIMARY KEY
    c.execute('''CREATE TABLE IF NOT EXISTS results (file_name TEXT PRIMARY KEY, result TEXT, date DATE)''')

    newModel = tf.keras.models.load_model('/content/drive/MyDrive/code/model_up')  # Load the model
    test_dir = "/content/drive/MyDrive/document/test/driver_license"
    test_images = list(pathlib.Path(test_dir).rglob("*"))
    class_names = [ 'driver_license','house_register','national_id_card','passport']

    for image_path in test_images:
        predicted_class = classify_image(image_path, newModel, class_names)
        image_name = os.path.basename(image_path)
        print(f"The image {image_name} is classified as {predicted_class}")
        c.execute("INSERT OR REPLACE INTO results VALUES (?, ?, ?)", (str(image_name), predicted_class, date.today()))
        conn.commit()

    # Define the SQL query to retrieve all records from the "results" table
    qry = "SELECT * FROM results"

    # Read the query result into a DataFrame
    df = pd.read_sql_query(qry, conn)

    # Close the cursor and connection
    c.close()
    conn.close()



0.99982494
The image dl1.jpg is classified as driver_license
0.9981687
The image dl2.JPG is classified as driver_license
0.9989472
The image dl3.JPG is classified as driver_license
0.66132843
The image dl4.JPG is classified as driver_license
0.999946
The image dl5.JPG is classified as driver_license
0.9998022
The image dl6.JPG is classified as driver_license
0.99721944
The image dl7.JPG is classified as driver_license
0.99992716
The image dl9.JPG is classified as driver_license
0.9971193
The image dl10.JPG is classified as driver_license


In [None]:
# import matplotlib.pyplot as plt

# for image_path in test_images:
#     predicted_class = classify_image(image_path, newModel, class_names)
#     image_name = os.path.basename(image_path)

#     # Load and display the image
#     img_display = plt.imread(image_path)
#     plt.imshow(img_display)
#     plt.title(f"{image_name} -> {predicted_class}")
#     plt.axis('off')  # Don't show axis values
#     plt.show()


In [None]:
conn = sqlite3.connect('result.db')

# Define the SQL query
qry = "SELECT * FROM results"

# Read the query result into a DataFrame
df = pd.read_sql_query(qry, conn)

# Close the database connection
conn.close()

# Display the first few records of the DataFrame
print(df.head(100))

      file_name          result        date
0  IMG_0959.JPG           other  2023-07-24
1  IMG_0198.JPG  driver_license  2023-07-24
2  IMG_0846.JPG           other  2023-07-24
3  IMG_1010.JPG        passport  2023-07-24
4  IMG_1002.JPG        passport  2023-07-24
5  IMG_1007.JPG        passport  2023-07-24
6  IMG_0974.JPG        passport  2023-07-24
