In [None]:
import os, gc, warnings
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, regularizers, Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, Callback
from sklearn.utils.class_weight import compute_class_weight

warnings.filterwarnings('ignore')

# --- PATH & HYPERPARAMS ---
HOME = os.getcwd() + "/"
IMG_SIZE   = (224, 224)
BATCH_SIZE = 16
NUM_CLASSES= 10
AUTOTUNE   = tf.data.AUTOTUNE

# --- LOAD DATASETS ---
train_ds = tf.keras.utils.image_dataset_from_directory(
    HOME + "train_images", validation_split=0.2, subset="training",
    seed=123, image_size=IMG_SIZE, batch_size=BATCH_SIZE
)
val_ds = tf.keras.utils.image_dataset_from_directory(
    HOME + "train_images", validation_split=0.2, subset="validation",
    seed=123, image_size=IMG_SIZE, batch_size=BATCH_SIZE
)
test_ds = tf.keras.utils.image_dataset_from_directory(
    HOME + "test_images", label_mode=None,
    image_size=IMG_SIZE, batch_size=BATCH_SIZE, shuffle=False
)

# --- PREPROCESS & AUGMENTATION ---
rescale = layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x,y: (rescale(x),y), num_parallel_calls=AUTOTUNE)
val_ds   = val_ds.map(lambda x,y: (rescale(x),y), num_parallel_calls=AUTOTUNE)
test_ds  = test_ds.map(lambda x: rescale(x),    num_parallel_calls=AUTOTUNE)

data_augmentation = Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.2),  # Tăng rotation
    layers.RandomZoom(0.2),     # Tăng zoom
    layers.RandomTranslation(0.2, 0.2),  # Tăng translation
    layers.RandomContrast(0.2),  # Thêm contrast
    layers.RandomBrightness(0.2)  # Thêm brightness
], name="data_augmentation")

# --- CLASS WEIGHT & OVERSAMPLE ---
labels = np.concatenate([y.numpy() for x,y in train_ds])
counts = np.bincount(labels)
class_weight = dict(enumerate(
    compute_class_weight("balanced", classes=np.unique(labels), y=labels)
))
train_unbatched = train_ds.unbatch()
def oversample(ds, counts):
    parts = []
    m = counts.max()
    for cid, c in enumerate(counts):
        d = ds.filter(lambda x, y: tf.reduce_all(tf.equal(y, cid))).repeat(int(np.ceil(m/c)))
        d = d.take(m)  # Lấy chính xác m mẫu cho mỗi lớp
        parts.append(d)
    return tf.data.Dataset.sample_from_datasets(parts, seed=123).take(m * len(counts))  # Lấy m * NUM_CLASSES mẫu
overs = oversample(train_unbatched, counts)
balanced_train = (overs
    .map(lambda x,y: (data_augmentation(x,True), y), num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE).shuffle(1000).prefetch(AUTOTUNE)
)
val_ds = val_ds.prefetch(AUTOTUNE)
test_ds = test_ds.prefetch(AUTOTUNE)

# --- MODEL ---
base = EfficientNetB0(include_top=False, weights=None, input_shape=(*IMG_SIZE,3))
x = base.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)  
x = layers.Dense(224, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
x = layers.Dropout(0.4)(x)
out = layers.Dense(NUM_CLASSES, activation='softmax')(x)
model = Model(inputs=base.input, outputs=out)
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# --- CALLBACKS ---
class PrintValMetrics(Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(f"Epoch {epoch+1}: val_loss = {logs['val_loss']:.4f}, val_accuracy = {logs['val_accuracy']:.4f}")

early = EarlyStopping(patience=5, restore_best_weights=True, verbose=1)
reduce = ReduceLROnPlateau(monitor="val_loss", factor=0.3, patience=2,
                           min_lr=1e-5, verbose=1)
checkpoint = ModelCheckpoint(
    'best_model.keras',
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)
print_val = PrintValMetrics()

In [None]:
# --- FIT ---
history = model.fit(
    balanced_train,
    validation_data=val_ds,
    epochs=100,
    callbacks=[early, reduce, checkpoint, print_val],
    verbose=1,  # Hiển thị progress bar
    class_weight=class_weight
)

In [None]:
# --- LOAD BEST WEIGHTS BEFORE PREDICT ---
model = tf.keras.models.load_model("best_model.keras")

# --- PREDICT & SUBMIT ---
preds = model.predict(test_ds).argmax(axis=1)
names = ['bacterial_leaf_blight','bacterial_leaf_streak','bacterial_panicle_blight',
         'blast','brown_spot','dead_heart','downy_mildew','hispa','normal','tungro']
submission = pd.DataFrame({
    "image_id":[os.path.basename(p) for p in test_ds.file_paths],
    "label":[names[i] for i in preds]
})
submission.to_csv("sample_submission.csv", index=False)