<a href="https://colab.research.google.com/github/cuducquang/ML_Project/blob/main/task2_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


#Import Library

In [2]:
from google.colab import drive
import gdown
import os
import shutil
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import random
import hashlib
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import numpy as np
import math
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import keras_tuner as kt
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau
from keras_tuner import HyperParameters
from tensorflow import keras
import tensorflow as tf
from google.colab import files

#Download TrainDataset

In [3]:
drive.mount('/content/drive')

# Google Drive file ID (from the shared link)
file_id = "1l-KlchrHmf3v87KneUe1Ejn-u9Ubfi1S"
file_name = "train_images.zip"
file_path = "/content/" + file_name

# Download the file
gdown.download(f"https://drive.google.com/uc?id={file_id}", file_path, quiet=False)
print(f"Downloaded: {file_path}")

Mounted at /content/drive


Downloading...
From (original): https://drive.google.com/uc?id=1l-KlchrHmf3v87KneUe1Ejn-u9Ubfi1S
From (redirected): https://drive.google.com/uc?id=1l-KlchrHmf3v87KneUe1Ejn-u9Ubfi1S&confirm=t&uuid=f518dc1e-2de1-48d4-b2ef-3468717b830b
To: /content/train_images.zip
100%|██████████| 517M/517M [00:06<00:00, 74.6MB/s]

Downloaded: /content/train_images.zip





#Unzip TrainDataset

In [4]:
!unzip -q $file_path -d /content/extracted_folder

#Data Argumentation for Train & Val Dataset

In [5]:
train_dir = '/content/extracted_folder/train'
val_dir = '/content/extracted_folder/val'

train_datagen = ImageDataGenerator(
    rescale=1.0/255.0, # 224, 224 , 3
    rotation_range=10,
    width_shift_range=0.05,
    height_shift_range=0.05,
    zoom_range=0.1,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    directory=train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_test_datagen = ImageDataGenerator(rescale=1.0/255.0)

val_generator = val_test_datagen.flow_from_directory(
    directory=val_dir,
    target_size=(224, 224),
    batch_size=64,
    class_mode='categorical'
)

Found 21289 images belonging to 10 classes.
Found 2071 images belonging to 10 classes.


In [6]:
x_batch, y_batch = next(train_generator)
print("Shape y_batch:", y_batch.shape)
print("Sample y_batch[0]:", y_batch[0])

Shape y_batch: (32, 10)
Sample y_batch[0]: [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]


#Build VGG16 architecture model

In [7]:
from tensorflow.keras.layers import (BatchNormalization, Activation)

def build_model(hp):
    base_model = VGG16(weights=None, include_top=False, input_shape=(224, 224, 3))

    x = base_model.output
    x = GlobalAveragePooling2D()(x)

    # Dense Block 1
    x = Dense(
        units=hp.Int('dense1_units', min_value=256, max_value=1024, step=128)
    )(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(hp.Float('dropout_rate1', min_value=0.2, max_value=0.5, step=0.1))(x)

    # Dense Block 2
    x = Dense(
        units=hp.Int('dense2_units', min_value=64, max_value=512, step=64)
    )(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(hp.Float('dropout_rate2', min_value=0.1, max_value=0.4, step=0.1))(x)

    # Output Layer
    outputs = Dense(10, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=outputs)

    model.compile(
        optimizer=Adam(
            learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 5e-4, 1e-4])
        ),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=2,
    verbose=1,
    min_lr=1e-6
)

tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=20,
    directory='kt_bayesian',
    project_name='rice_variety_tuning'
)

tuner.search_space_summary()

Search space summary
Default search space size: 5
dense1_units (Int)
{'default': None, 'conditions': [], 'min_value': 256, 'max_value': 1024, 'step': 128, 'sampling': 'linear'}
dropout_rate1 (Float)
{'default': 0.2, 'conditions': [], 'min_value': 0.2, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
dense2_units (Int)
{'default': None, 'conditions': [], 'min_value': 64, 'max_value': 512, 'step': 64, 'sampling': 'linear'}
dropout_rate2 (Float)
{'default': 0.1, 'conditions': [], 'min_value': 0.1, 'max_value': 0.4, 'step': 0.1, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0005, 0.0001], 'ordered': True}


#Fine-tune Search

In [None]:
tuner.search(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    callbacks=[early_stopping, reduce_lr]
)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Hyperparameters: {best_hps.values}")

Trial 6 Complete [00h 26m 31s]
val_accuracy: 0.6735876202583313

Best val_accuracy So Far: 0.9159826040267944
Total elapsed time: 07h 16m 48s

Search: Running Trial #7

Value             |Best Value So Far |Hyperparameter
768               |1024              |dense1_units
0.4               |0.4               |dropout_rate1
64                |384               |dense2_units
0.2               |0.1               |dropout_rate2
0.001             |0.0005            |learning_rate

Epoch 1/20
[1m666/666[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m277s[0m 401ms/step - accuracy: 0.3702 - loss: 1.8940 - val_accuracy: 0.6688 - val_loss: 1.2252 - learning_rate: 0.0010
Epoch 2/20
[1m666/666[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 387ms/step - accuracy: 0.4391 - loss: 1.6446 - val_accuracy: 0.3211 - val_loss: 2.0007 - learning_rate: 0.0010
Epoch 3/20
[1m666/666[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385ms/step - accuracy: 0.4861 - loss: 1.4938
Epoch 3: ReduceLR

#Train Model

In [8]:
best_hps = HyperParameters()
best_hps.Fixed('dense1_units', 1024)
best_hps.Fixed('dropout_rate1', 0.4)
best_hps.Fixed('dense2_units', 384)
best_hps.Fixed('dropout_rate2', 0.1)
best_hps.Fixed('learning_rate', 0.0005)

# Build and train the best model
best_model = tuner.hypermodel.build(best_hps)
best_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=25,
    callbacks=[early_stopping, reduce_lr]
)

best_model.save("best_rice_variety_model.h5")

  self._warn_if_super_not_called()


Epoch 1/25
[1m666/666[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 455ms/step - accuracy: 0.3656 - loss: 1.9056 - val_accuracy: 0.4128 - val_loss: 1.7877 - learning_rate: 5.0000e-04
Epoch 2/25
[1m666/666[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 383ms/step - accuracy: 0.4716 - loss: 1.5496 - val_accuracy: 0.5031 - val_loss: 1.5178 - learning_rate: 5.0000e-04
Epoch 3/25
[1m666/666[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 389ms/step - accuracy: 0.5075 - loss: 1.4357 - val_accuracy: 0.6803 - val_loss: 1.1118 - learning_rate: 5.0000e-04
Epoch 4/25
[1m666/666[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 386ms/step - accuracy: 0.5362 - loss: 1.3436 - val_accuracy: 0.5983 - val_loss: 1.1837 - learning_rate: 5.0000e-04
Epoch 5/25
[1m666/666[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 388ms/step - accuracy: 0.5545 - loss: 1.2710 - val_accuracy: 0.7117 - val_loss: 0.8887 - learning_rate: 5.0000e-04
Epoch 6/25
[1m666/666[0m [3



#Load TestDataset

In [9]:
# Google Drive file ID (from the shared link)
file_test_id = "1othgf5BTO_sZYXBOWykn2OkitSCna7J6"
file_test_name = "test_images.zip"
file_test_path = "/content/" + file_test_name

# Download the file
gdown.download(f"https://drive.google.com/uc?id={file_test_id}", file_test_path, quiet=False)
print(f"Downloaded: {file_test_path}")

Downloading...
From (original): https://drive.google.com/uc?id=1othgf5BTO_sZYXBOWykn2OkitSCna7J6
From (redirected): https://drive.google.com/uc?id=1othgf5BTO_sZYXBOWykn2OkitSCna7J6&confirm=t&uuid=8b5ef193-2a05-4c3e-8838-5e1a8ece2f34
To: /content/test_images.zip
100%|██████████| 274M/274M [00:06<00:00, 42.0MB/s]

Downloaded: /content/test_images.zip





#Unzip TestDataset

In [10]:
!unzip -q $file_test_path -d /content/extracted_test_folder

In [11]:
print(train_generator.class_indices)

{'ADT45': 0, 'AndraPonni': 1, 'AtchayaPonni': 2, 'IR20': 3, 'KarnatakaPonni': 4, 'Onthanel': 5, 'Ponni': 6, 'RR': 7, 'Surya': 8, 'Zonal': 9}


#Predict on TestDataset

In [12]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image

model = load_model("/content/best_rice_variety_model.h5")

def predict_folder_to_csv(model, test_folder, output_csv_path, class_names):
    results = []

    for img_name in os.listdir(test_folder):
        if img_name.lower().endswith(('.jpg')):
            img_path = os.path.join(test_folder, img_name)

            img = image.load_img(img_path, target_size=(224, 224))
            img_array = image.img_to_array(img) / 255.0
            img_batch = np.expand_dims(img_array, axis=0)

            pred = model.predict(img_batch, verbose=0)
            predicted_index = np.argmax(pred)
            predicted_label = class_names[predicted_index]

            results.append({
                'filename': img_name,
                'predicted_label': predicted_label
            })

    results = sorted(results, key=lambda x: int(x['filename'].split('.')[0]))

    df = pd.DataFrame(results)
    df.to_csv(output_csv_path, index=False)
    print(f" Prediction results saved to {output_csv_path}")

class_names = list(train_generator.class_indices.keys())

predict_folder_to_csv(
    model=model,
    test_folder="/content/extracted_test_folder/test_images",
    output_csv_path="/content/predicted_results.csv",
    class_names=class_names
)



 Prediction results saved to /content/predicted_results.csv
