<a href="https://colab.research.google.com/github/caominhtrivo/AI-based-Captcha-Solver/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image

import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

In [None]:
datapath = '/content/dataset'
X = []
Y = []
class_names = sorted(os.listdir(datapath)) #bikes, busses, ... bla bla
label_map = {name: idx for idx, name in enumerate(class_names)}
for object in class_names: #iterate thru each folder
    for img in os.listdir(os.path.join(datapath, object)): #iterate thru each img in each folder
        img = cv2.imread(os.path.join(datapath, object, img))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #k can convert sang blackwhite, resnet doc RGB duoc
        img = cv2.resize(img, (224, 224))
        X.append(img)
        Y.append(label_map[object])

In [None]:
#convert to numpy arr
X = np.array(X, dtype=np.float32)
Y = np.array(Y)
print("example label:",Y[1])
print("X shape:",X.shape)
print("Y shape:",Y.shape)

example label: 0
X shape: (11730, 224, 224, 3)
Y shape: (11730,)


In [None]:
#normalize 0-255 -> 0-1
X = X / 255.0
print(X.shape)
print(X[1])
Y = tf.keras.utils.to_categorical(Y, num_classes=len(class_names)) #one-hot encoding
print(Y.shape)
print(Y[1])

(11730, 224, 224, 3)
[[[0.7058824  0.6901961  0.68235296]
  [0.7137255  0.7019608  0.69411767]
  [0.73333335 0.7254902  0.7176471 ]
  ...
  [0.5568628  0.49803922 0.4745098 ]
  [0.49803922 0.4392157  0.41568628]
  [0.46666667 0.40784314 0.38039216]]

 [[0.62352943 0.60784316 0.6039216 ]
  [0.627451   0.6156863  0.6117647 ]
  [0.6392157  0.6313726  0.627451  ]
  ...
  [0.5921569  0.5294118  0.5019608 ]
  [0.5568628  0.49019608 0.4627451 ]
  [0.5372549  0.47058824 0.44313726]]

 [[0.4745098  0.4627451  0.46666667]
  [0.4745098  0.4627451  0.46666667]
  [0.4745098  0.46666667 0.47058824]
  ...
  [0.654902   0.5764706  0.5529412 ]
  [0.65882355 0.58431375 0.5529412 ]
  [0.65882355 0.58431375 0.5529412 ]]

 ...

 [[0.69803923 0.7058824  0.7019608 ]
  [0.69411767 0.7019608  0.69803923]
  [0.6862745  0.69411767 0.6901961 ]
  ...
  [0.2627451  0.30588236 0.34901962]
  [0.25490198 0.29803923 0.34117648]
  [0.2509804  0.29411766 0.33333334]]

 [[0.7176471  0.72156864 0.72156864]
  [0.70980394 0.

In [None]:
#split data -> train 70, val 15, test 15
dataset = tf.data.Dataset.from_tensor_slices((X, Y))
dataset = dataset.shuffle(buffer_size = len(X), seed = 30) #avoid bias
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))
test_size = int(0.15 * len(X))
train_data = dataset.take(train_size)
val_data = dataset.skip(train_size).take(val_size)
test_data = dataset.skip(train_size).skip(val_size).take(test_size)
print(train_data.cardinality(), val_data.cardinality(), test_data.cardinality())

tf.Tensor(8211, shape=(), dtype=int64) tf.Tensor(1759, shape=(), dtype=int64) tf.Tensor(1759, shape=(), dtype=int64)


In [None]:
#chia ra batch
BATCH_SIZE = 32
train_dataset = train_data.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)
val_dataset = val_data.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)
test_dataset = test_data.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
from tensorflow.keras import layers

data_augmentation = tf.keras.Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(0.1),
  layers.RandomZoom(0.1),
])

In [None]:
#load base model (resnet) dcm ngu vcl
base_model = ResNet50(
    weights='imagenet',
    include_top=False, #remove last layer
    input_shape=(224, 224, 3)
)
base_model.trainable = False
#skip resnet

In [None]:
#build main model
input_shape = (224, 224, 3)
num_classes = len(class_names) #12
model = tf.keras.Sequential([
    data_augmentation,
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='softmax')
])

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history = model.fit(train_dataset, epochs=5, validation_data=val_dataset)

Epoch 1/5
[1m257/257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 99ms/step - accuracy: 0.5748 - loss: 1.2515 - val_accuracy: 0.6538 - val_loss: 1.0133
Epoch 2/5
[1m257/257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 99ms/step - accuracy: 0.5721 - loss: 1.2212 - val_accuracy: 0.6521 - val_loss: 1.0095
Epoch 3/5
[1m257/257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 98ms/step - accuracy: 0.5833 - loss: 1.1825 - val_accuracy: 0.6515 - val_loss: 1.0296
Epoch 4/5
[1m257/257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 98ms/step - accuracy: 0.5735 - loss: 1.2557 - val_accuracy: 0.6350 - val_loss: 1.0923
Epoch 5/5
[1m257/257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 98ms/step - accuracy: 0.5731 - loss: 1.2324 - val_accuracy: 0.6180 - val_loss: 1.1240
