In [None]:
import os
import glob
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input 
from sklearn.model_selection import train_test_split

In [None]:
BASE_DIR = '/kaggle/input/dataset'
IMG_SIZE = (64,64)

# 1) Lấy tên class
CLASS_NAMES = [
    d for d in sorted(os.listdir(BASE_DIR))
    if os.path.isdir(os.path.join(BASE_DIR, d))
]
print("Classes:", CLASS_NAMES)
# 2) Tạo DataFrame 
records = []
for label, cls in enumerate(CLASS_NAMES):
    for split in ['train', 'test']:
        split_dir = os.path.join(BASE_DIR, cls, split)
        if not os.path.isdir(split_dir):
            continue
        entries = os.listdir(split_dir)
        has_subdirs = any(os.path.isdir(os.path.join(split_dir, e)) for e in entries)
        if has_subdirs:
            file_list = glob.glob(os.path.join(split_dir, '**', '*.*'), recursive=True)
        else:
            file_list = [os.path.join(split_dir, f) for f in entries]

        for fp in sorted(file_list):
            if fp.lower().endswith(('.jpg', '.jpeg', '.png')):
                records.append({
                    'filepath': fp,
                    'label': label,
                    'split': split
                })
df = pd.DataFrame(records)
print("\nSố lượng ảnh theo split (train/test):\n", df['split'].value_counts())

# 3) Hàm tiền xử lý ảnh
def preprocess(filepath):
    img = Image.open(filepath).convert('RGB')
    img = img.resize(IMG_SIZE, Image.LANCZOS)
    arr = np.array(img, dtype='float32') / 255.0
    return arr

# 4) Tải train và test
def load_data(split_name):
    sub = df[df['split'] == split_name]
    X = [preprocess(fp) for fp in sub['filepath']]
    y = sub['label'].tolist()
    return np.array(X), np.array(y)

X_train_all, y_train_all = load_data('train')
X_test,       y_test       = load_data('test')

print(f"Loaded X_train_all: {X_train_all.shape}, X_test: {X_test.shape}")

# 5) Tách val từ train
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_all,
    test_size=0.2,         
    stratify=y_train_all,  
    random_state=42
)

print("Final shapes:")
print("  X_train:", X_train.shape, "y_train:", y_train.shape)
print("  X_val:  ", X_val.shape,   "y_val:  ",   y_val.shape)
print("  X_test: ", X_test.shape,  "y_test: ",  y_test.shape)

In [None]:
print("Number of classes:", len(CLASS_NAMES))
print("Unique labels in y_train:", np.unique(y_train))

In [None]:
# Augmentation layer 
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
])

model = tf.keras.Sequential([
    Input(shape=(64,64, 3)),
    data_augmentation,  # <-- Thêm augmentation vào đây
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),
    
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.5),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(CLASS_NAMES), activation='softmax')
])
model.summary()

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max')
]


In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=4,
    callbacks=callbacks,
    shuffle=True
)

In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Training Progress')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.show()

In [None]:
best_model = tf.keras.models.load_model("best_model.keras")
test_loss, test_acc = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_acc:.4f}")