In [3]:
!unzip -o "/content/Data Project.zip"

Archive:  /content/Data Project.zip
   creating: Data Project/
   creating: Data Project/1. Cơm trắng - 10.000/
  inflating: Data Project/1. Cơm trắng - 10.000/img (1).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (10).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (100).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (101).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (102).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (103).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (104).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (105).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (106).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (107).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (108).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (109).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (11).jpg  
  inflating: Data Project/1. Cơm trắng - 10.000/img (110).jp

In [4]:
# ================== TRAIN CNN TỪ ĐẦU CHO 17 LỚP ==================
import os, json
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# 0) CẤU HÌNH CƠ BẢN
data_dir   = '/content/Data Project'   # Thư mục gốc, bên trong có 17 thư mục con (mỗi thư mục = 1 món)
IMG_SIZE   = (128, 128)
BATCH_SIZE = 32
VAL_SPLIT  = 0.2
SEED       = 42

# 1) CHUẨN BỊ DỮ LIỆU VỚI IMAGEDATAGENERATOR (KHÔNG DÙNG PRE-TRAIN)
#    Tách train/val bằng validation_split, giữ seed để tách ổn định.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=VAL_SPLIT,
    rotation_range=20,
    width_shift_range=0.10,
    height_shift_range=0.10,
    zoom_range=0.20,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=VAL_SPLIT
)

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=SEED
)

val_generator = val_datagen.flow_from_directory(
    data_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=SEED
)

# In ra các lớp đã tìm thấy để kiểm tra
print("\nCác lớp mô hình sẽ học (tên_lớp: index):")
print(train_generator.class_indices)
num_classes = len(train_generator.class_indices)

# Lưu mapping lớp → index để dùng khi suy luận sau này
with open('classes.json', 'w') as f:
    json.dump(train_generator.class_indices, f, ensure_ascii=False, indent=2)
print(f"✅ Đã lưu nhãn vào classes.json (số lớp: {num_classes})")

# 2) XÂY DỰNG MÔ HÌNH (CNN từ đầu, KHÔNG pre-train)
#    Giữ đúng “cấu trúc thầy dạy”: các lớp Conv2D → MaxPooling2D → Flatten → Dense → Dropout → Dense.
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    # Thêm 1 block conv nhẹ cho đủ sức phân biệt 17 lớp (vẫn là CNN scratch)
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')  # Tự động khớp số lớp theo thư mục
])

# 3) COMPILE MÔ HÌNH
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# 4) HUẤN LUYỆN MÔ HÌNH
#    Nếu dữ liệu mỗi lớp ít, 25–40 epochs là hợp lý; để 50 kèm early-stop là tốt nhất.
#    Để giữ đúng “mẫu thầy”, mình không thêm callbacks bắt buộc. Bạn có thể bật nhanh ở dưới nếu muốn.
EPOCHS = 30

# (TÙY CHỌN) CALLBACKS: bật nếu muốn dừng sớm & lấy best model — vẫn không ảnh hưởng cấu trúc chính
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=8, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=1e-6, verbose=1),
    ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True, verbose=1)
]

print("\nBắt đầu huấn luyện...")
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks  # muốn “y như mẫu thầy” thì bỏ tham số này đi
)

# 5) LƯU LẠI THÀNH QUẢ
model.save('food17_scratch.h5')   # đặt tên rõ ràng: CNN scratch cho 17 lớp
print("\n✅ Huấn luyện hoàn tất! Mô hình đã được lưu vào file 'food17_scratch.h5'")
print("💾 Ngoài ra có 'best_model.h5' (nếu callbacks đã bật) và 'classes.json' (mapping nhãn).")


Found 1759 images belonging to 17 classes.
Found 432 images belonging to 17 classes.

Các lớp mô hình sẽ học (tên_lớp: index):
{'1. Cơm trắng - 10.000': 0, '10. Rau xào - 10000': 1, '11. Trứng chiên - 25000': 2, '12. Canh bí đao - 12000': 3, '13. Canh bí đỏ - 12000': 4, '14. Dưa leo - 5000': 5, '15. Lạp sưởng - 15000': 6, '16. Nước chấm - 3000': 7, '17. Khay trống - 0': 8, '2. Đậu hũ sốt cà - 25000': 9, '3. Cá hú kho - 30000': 10, '4. Thịt kho trứng - 30000': 11, '5. Thịt kho - 25000': 12, '6. Canh chua cá - 25000': 13, '7. Canh chua - 10000': 14, '8. Sường nướng - 30000': 15, '9. Canh rau - 7000': 16}
✅ Đã lưu nhãn vào classes.json (số lớp: 17)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Bắt đầu huấn luyện...


  self._warn_if_super_not_called()


Epoch 1/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 485ms/step - accuracy: 0.1329 - loss: 2.6874
Epoch 1: val_accuracy improved from -inf to 0.46065, saving model to best_model.h5




[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 625ms/step - accuracy: 0.1344 - loss: 2.6826 - val_accuracy: 0.4606 - val_loss: 1.5420 - learning_rate: 0.0010
Epoch 2/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 443ms/step - accuracy: 0.4018 - loss: 1.6570
Epoch 2: val_accuracy improved from 0.46065 to 0.60185, saving model to best_model.h5




[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 525ms/step - accuracy: 0.4025 - loss: 1.6548 - val_accuracy: 0.6019 - val_loss: 1.0935 - learning_rate: 0.0010
Epoch 3/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 442ms/step - accuracy: 0.5302 - loss: 1.3297
Epoch 3: val_accuracy improved from 0.60185 to 0.65278, saving model to best_model.h5




[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 536ms/step - accuracy: 0.5307 - loss: 1.3281 - val_accuracy: 0.6528 - val_loss: 0.9597 - learning_rate: 0.0010
Epoch 4/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472ms/step - accuracy: 0.6269 - loss: 1.0530
Epoch 4: val_accuracy improved from 0.65278 to 0.71296, saving model to best_model.h5




[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 558ms/step - accuracy: 0.6270 - loss: 1.0525 - val_accuracy: 0.7130 - val_loss: 0.8109 - learning_rate: 0.0010
Epoch 5/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464ms/step - accuracy: 0.6743 - loss: 0.8739
Epoch 5: val_accuracy improved from 0.71296 to 0.76852, saving model to best_model.h5




[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 559ms/step - accuracy: 0.6743 - loss: 0.8741 - val_accuracy: 0.7685 - val_loss: 0.7368 - learning_rate: 0.0010
Epoch 6/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 474ms/step - accuracy: 0.7331 - loss: 0.7705
Epoch 6: val_accuracy improved from 0.76852 to 0.81250, saving model to best_model.h5




[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 560ms/step - accuracy: 0.7332 - loss: 0.7701 - val_accuracy: 0.8125 - val_loss: 0.5593 - learning_rate: 0.0010
Epoch 7/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 471ms/step - accuracy: 0.7506 - loss: 0.6934
Epoch 7: val_accuracy improved from 0.81250 to 0.87037, saving model to best_model.h5




[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 568ms/step - accuracy: 0.7508 - loss: 0.6930 - val_accuracy: 0.8704 - val_loss: 0.4080 - learning_rate: 0.0010
Epoch 8/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 483ms/step - accuracy: 0.7897 - loss: 0.6068
Epoch 8: val_accuracy did not improve from 0.87037
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 578ms/step - accuracy: 0.7896 - loss: 0.6069 - val_accuracy: 0.7917 - val_loss: 0.5604 - learning_rate: 0.0010
Epoch 9/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step - accuracy: 0.7951 - loss: 0.5907
Epoch 9: val_accuracy did not improve from 0.87037
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 564ms/step - accuracy: 0.7949 - loss: 0.5913 - val_accuracy: 0.8681 - val_loss: 0.4483 - learning_rate: 0.0010
Epoch 10/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 565ms/step - accuracy: 0.8401 - loss: 0.4753 - val_accuracy: 0.9190 - val_loss: 0.3477 - learning_rate: 0.0010
Epoch 14/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468ms/step - accuracy: 0.8675 - loss: 0.3862
Epoch 14: val_accuracy did not improve from 0.91898
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 553ms/step - accuracy: 0.8675 - loss: 0.3858 - val_accuracy: 0.8958 - val_loss: 0.3883 - learning_rate: 0.0010
Epoch 15/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step - accuracy: 0.8498 - loss: 0.3986
Epoch 15: val_accuracy did not improve from 0.91898
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 554ms/step - accuracy: 0.8499 - loss: 0.3984 - val_accuracy: 0.9144 - val_loss: 0.2592 - learning_rate: 0.0010
Epoch 16/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 517ms/step - accuracy: 0.9161 - loss: 0.2270 - val_accuracy: 0.9444 - val_loss: 0.2806 - learning_rate: 5.0000e-04
Epoch 21/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 434ms/step - accuracy: 0.9336 - loss: 0.1933
Epoch 21: val_accuracy did not improve from 0.94444
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 523ms/step - accuracy: 0.9335 - loss: 0.1934 - val_accuracy: 0.9306 - val_loss: 0.1863 - learning_rate: 5.0000e-04
Epoch 22/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 439ms/step - accuracy: 0.9340 - loss: 0.1998
Epoch 22: val_accuracy did not improve from 0.94444
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 519ms/step - accuracy: 0.9340 - loss: 0.1996 - val_accuracy: 0.9375 - val_loss: 0.1930 - learning_rate: 5.0000e-04
Epoch 23/30
[1m55/55[0m [32m━━━━━━━━



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 549ms/step - accuracy: 0.9668 - loss: 0.0999 - val_accuracy: 0.9560 - val_loss: 0.1618 - learning_rate: 2.5000e-04
Epoch 28/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 443ms/step - accuracy: 0.9668 - loss: 0.1079
Epoch 28: val_accuracy did not improve from 0.95602
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 523ms/step - accuracy: 0.9668 - loss: 0.1079 - val_accuracy: 0.9491 - val_loss: 0.2200 - learning_rate: 2.5000e-04
Epoch 29/30
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 444ms/step - accuracy: 0.9645 - loss: 0.0990
Epoch 29: val_accuracy did not improve from 0.95602
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 540ms/step - accuracy: 0.9644 - loss: 0.0992 - val_accuracy: 0.9444 - val_loss: 0.1650 - learning_rate: 2.5000e-04
Epoch 30/30
[1m55/55[0m [32m━━━━━━━━



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 536ms/step - accuracy: 0.9723 - loss: 0.0782 - val_accuracy: 0.9583 - val_loss: 0.1323 - learning_rate: 2.5000e-04





✅ Huấn luyện hoàn tất! Mô hình đã được lưu vào file 'food17_scratch.h5'
💾 Ngoài ra có 'best_model.h5' (nếu callbacks đã bật) và 'classes.json' (mapping nhãn).
