In [None]:
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image

import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

In [None]:
datapath = os.path.join('dataset')
X = []
Y = []
class_names = sorted(os.listdir(datapath)) #bikes, busses, ... bla bla
label_map = {name: idx for idx, name in enumerate(class_names)}
for object in class_names: #iterate thru each folder
    for img in os.listdir(os.path.join(datapath, object)): #iterate thru each img in each folder
        img = cv2.imread(os.path.join(datapath, object, img))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #k can convert sang blackwhite, resnet doc RGB duoc
        img = cv2.resize(img, (224, 224))
        X.append(img)
        Y.append(label_map[object])

In [None]:
#convert to numpy arr
X = np.array(X, dtype=np.float32)
Y = np.array(Y)
print("example label:",Y[1])
print("X shape:",X.shape)
print("Y shape:",Y.shape)

In [None]:
#normalize 0-255 -> 0-1
X = X / 255.0
print(X.shape)
print(X[1])
Y = tf.keras.utils.to_categorical(Y, num_classes=len(class_names)) #one-hot encoding
print(Y.shape)
print(Y[1])

In [None]:
#split data -> train 70, val 15, test 15
dataset = tf.data.Dataset.from_tensor_slices((X, Y))
dataset = dataset.shuffle(buffer_size = len(X), seed = 30) #avoid bias
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))
test_size = int(0.15 * len(X))
train_data = dataset.take(train_size)
val_data = dataset.skip(train_size).take(val_size)
test_data = dataset.skip(train_size).skip(val_size).take(test_size)
print(train_data.cardinality(), val_data.cardinality(), test_data.cardinality())

In [None]:
#chia ra batch
BATCH_SIZE = 32
train_dataset = train_data.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)
val_dataset = val_data.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)
test_dataset = test_data.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
#load base model (resnet)
base_model = ResNet50(
    weights='imagenet',
    include_top=False, #remove last layer
    input_shape=(224, 224, 3)
)
base_model.trainable = False

In [None]:
#build main model
model = tf.keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.2), #avoid overfitting
    layers.Dense(128, activation='relu'),
    layers.Dense(12, activation='softmax'),
])

In [None]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history = model.fit(train_dataset, epochs=5, validation_data=val_dataset)