In [5]:
# ----- Imports -----
import os, numpy as np, pandas as pd, tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import AdamW
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from glob import glob

# ----- Config -----
SEED = 42
BATCH_SIZE = 32
IMG_SIZE = 224
EPOCHS = 60
LR = 1e-4
NUM_CLASSES = 7
AUTOTUNE = tf.data.AUTOTUNE
TRAIN_DIR = '/kaggle/input/fine-grained-fruit-quality-assessment/train/train'
TEST_DIR = '/kaggle/input/fine-grained-fruit-quality-assessment/test/testNew'
CLASS_NAMES = ['banana_overripe', 'banana_ripe', 'banana_rotten', 'banana_unripe',
               'tomato_fully_ripened', 'tomato_green', 'tomato_half_ripened']
class_to_idx = {cls: i for i, cls in enumerate(CLASS_NAMES)}

# ----- Model -----
def build_model(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=NUM_CLASSES):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)
    for filters in [64, 128, 256]:
        x = layers.Conv2D(filters, 3, padding='same', activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling2D()(x)
    x = layers.Reshape((-1, x.shape[-1]))(x)
    attention = layers.MultiHeadAttention(num_heads=4, key_dim=32)(x, x)
    x = layers.Add()([x, attention])
    x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return models.Model(inputs, outputs)

# ----- Data Prep -----
def decode_image(filename, label=None, img_size=IMG_SIZE):
    img = tf.io.read_file(filename)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [img_size, img_size])
    img = img / 255.0
    return (img, label) if label is not None else img

def augment(img, label):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_brightness(img, 0.2)
    img = tf.image.random_contrast(img, 0.7, 1.3)
    return img, label

# ----- Load & Split Data -----
image_paths, labels = [], []
for cls in CLASS_NAMES:
    paths = glob(f"{TRAIN_DIR}/{cls}/*.jpg")
    image_paths.extend(paths)
    labels.extend([class_to_idx[cls]] * len(paths))
image_paths, labels = np.array(image_paths), np.array(labels)

train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.2, stratify=labels, random_state=SEED)

train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_ds = train_ds.map(decode_image, num_parallel_calls=AUTOTUNE).map(augment, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.shuffle(1024).batch(BATCH_SIZE).prefetch(AUTOTUNE)

val_ds = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
val_ds = val_ds.map(decode_image, num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE).prefetch(AUTOTUNE)

# ----- Class Weights -----
class_weights = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
class_weights_dict = dict(enumerate(class_weights))

# ----- Train -----
model = build_model()
model.compile(optimizer=AdamW(LR), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS, class_weight=class_weights_dict)

# ----- Save Weights -----
model.save_weights("model_weights.weights.h5")

# ----- Show Final Val Accuracy -----
val_loss, val_acc = model.evaluate(val_ds)
print(f"✅ Final Validation Accuracy: {val_acc:.4f} | Loss: {val_loss:.4f}")

# ----- Test Accuracy Note -----
print("❗Test accuracy can't be shown — Kaggle hides labels for test set.")
print("📤 Please upload submission.csv to Kaggle to view test leaderboard score.")

# ----- Predict & Save Submission -----
test_files = sorted(glob(f"{TEST_DIR}/*.jpg"))
test_ids = [os.path.basename(p) for p in test_files]
test_ds = tf.data.Dataset.from_tensor_slices(test_files)
test_ds = test_ds.map(lambda x: decode_image(x), num_parallel_calls=AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

preds = model.predict(test_ds)
pred_labels = np.argmax(preds, axis=1)
pred_class_names = [CLASS_NAMES[i] for i in pred_labels]

submission = pd.DataFrame({
    "ImageID": test_ids,
    "Class": pred_labels,
    "ClassName": pred_class_names
})
submission.to_csv("submission.csv", index=False)
print(f"✅ submission.csv saved with {len(submission)} entries (with both Class and ClassName)")


Epoch 1/60
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 190ms/step - accuracy: 0.5189 - loss: 1.3939 - val_accuracy: 0.2224 - val_loss: 4.4879
Epoch 2/60
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 169ms/step - accuracy: 0.7609 - loss: 0.8394 - val_accuracy: 0.3340 - val_loss: 2.8275
Epoch 3/60
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 126ms/step - accuracy: 0.8000 - loss: 0.8096 - val_accuracy: 0.7350 - val_loss: 0.6394
Epoch 4/60
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 169ms/step - accuracy: 0.8214 - loss: 0.6658 - val_accuracy: 0.8391 - val_loss: 0.3928
Epoch 5/60
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 170ms/step - accuracy: 0.8474 - loss: 0.6139 - val_accuracy: 0.8837 - val_loss: 0.3013
Epoch 6/60
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 169ms/step - accuracy: 0.8649 - loss: 0.5625 - val_accuracy: 0.8526 - val_loss: 0.4027
Epoch 7/60

In [12]:
# ----- Test Script to Load & Evaluate Saved Model -----
def test_model(weights_path="/kaggle/working/model_weights.weights.h5"):
    print("🔄 Building model and loading weights...")
    model = build_model()
    model.compile(optimizer=AdamW(LR), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.load_weights(weights_path)

    print("📊 Evaluating loaded model on validation set...")
    loss, acc = model.evaluate(val_ds, verbose=0)
    print("✅ Loaded model from saved weights")
    print(f"📈 Validation Accuracy (from saved model): {acc:.4f}")
    print(f"📉 Validation Loss: {loss:.4f}")

# ✅ Run the test
test_model()


In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from glob import glob
from tensorflow.keras.preprocessing import image

def load_cnn_transformer_model(weights_path):
    model = build_model()
    model.compile(optimizer=AdamW(LR), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.load_weights(weights_path)
    print("✅ Model loaded from weights.")
    return model

def preprocess_image(img_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [IMG_SIZE[0], IMG_SIZE[1]])
    img = img / 255.0
    return img

def prepare_test_dataset(test_dir):
    file_paths = sorted(glob(f"{test_dir}/*.jpg"))
    dataset = tf.data.Dataset.from_tensor_slices(file_paths)
    dataset = dataset.map(lambda x: preprocess_image(x), num_parallel_calls=AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return dataset, file_paths

def predict_and_save_submission(weights_path, test_dir, output_file='submission.csv'):
    model = load_cnn_transformer_model(weights_path)
    test_ds, file_paths = prepare_test_dataset(test_dir)

    print("🔍 Predicting...")
    predictions = model.predict(test_ds)
    pred_classes = np.argmax(predictions, axis=1)
    pred_class_names = [CLASS_NAMES[i] for i in pred_classes]

    image_ids = [os.path.basename(p) for p in file_paths]

    submission = pd.DataFrame({
        "ImageID": image_ids,
        "Class": pred_classes,
        "ClassName": pred_class_names
    })

    submission.to_csv(output_file, index=False)
    print(f"✅ submission saved to: {output_file}")
    print(submission.head())

    return submission

# Example usage
# predict_and_save_submission("/kaggle/working/model_weights.weights.h5", "/kaggle/input/fine-grained-fruit-quality-assessment/test/testNew")


In [8]:
# # ----- Imports -----
# import os, numpy as np, pandas as pd, tensorflow as tf
# from tensorflow.keras import layers, models
# from tensorflow.keras.optimizers import AdamW
# from sklearn.utils.class_weight import compute_class_weight
# from sklearn.model_selection import train_test_split
# from glob import glob

# # ----- Config -----
# SEED = 42
# BATCH_SIZE = 32
# IMG_SIZE = 224
# EPOCHS = 60
# LR = 1e-4
# NUM_CLASSES = 7
# AUTOTUNE = tf.data.AUTOTUNE
# TRAIN_DIR = '/kaggle/input/fine-grained-fruit-quality-assessment/train/train'
# TEST_DIR = '/kaggle/input/fine-grained-fruit-quality-assessment/test/testNew'
# CLASS_NAMES = ['banana_overripe', 'banana_ripe', 'banana_rotten', 'banana_unripe',
#                'tomato_fully_ripened', 'tomato_green', 'tomato_half_ripened']
# class_to_idx = {cls: i for i, cls in enumerate(CLASS_NAMES)}

# # ----- Model -----
# def build_model(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=NUM_CLASSES):
#     inputs = layers.Input(shape=input_shape)
#     x = layers.Conv2D(32, 3, padding='same', activation='relu')(inputs)
#     x = layers.BatchNormalization()(x)
#     x = layers.MaxPooling2D()(x)
#     for filters in [64, 128, 256]:
#         x = layers.Conv2D(filters, 3, padding='same', activation='relu')(x)
#         x = layers.BatchNormalization()(x)
#         x = layers.MaxPooling2D()(x)
#     x = layers.Reshape((-1, x.shape[-1]))(x)
#     attention = layers.MultiHeadAttention(num_heads=4, key_dim=32)(x, x)
#     x = layers.Add()([x, attention])
#     x = layers.LayerNormalization()(x)
#     x = layers.GlobalAveragePooling1D()(x)
#     x = layers.Dense(128, activation='relu')(x)
#     x = layers.Dropout(0.3)(x)
#     outputs = layers.Dense(num_classes, activation='softmax')(x)
#     return models.Model(inputs, outputs)

# # ----- Data Prep -----
# def decode_image(filename, label=None, img_size=IMG_SIZE):
#     img = tf.io.read_file(filename)
#     img = tf.image.decode_jpeg(img, channels=3)
#     img = tf.image.resize(img, [img_size, img_size])
#     img = img / 255.0
#     return (img, label) if label is not None else img

# def augment(img, label):
#     img = tf.image.random_flip_left_right(img)
#     img = tf.image.random_brightness(img, 0.2)
#     img = tf.image.random_contrast(img, 0.7, 1.3)
#     return img, label

# # ----- Load & Split Data -----
# image_paths, labels = [], []
# for cls in CLASS_NAMES:
#     paths = glob(f"{TRAIN_DIR}/{cls}/*.jpg")
#     image_paths.extend(paths)
#     labels.extend([class_to_idx[cls]] * len(paths))
# image_paths, labels = np.array(image_paths), np.array(labels)

# train_paths, val_paths, train_labels, val_labels = train_test_split(
#     image_paths, labels, test_size=0.2, stratify=labels, random_state=SEED)

# train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
# train_ds = train_ds.map(decode_image, num_parallel_calls=AUTOTUNE).map(augment, num_parallel_calls=AUTOTUNE)
# train_ds = train_ds.shuffle(1024).batch(BATCH_SIZE).prefetch(AUTOTUNE)

# val_ds = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
# val_ds = val_ds.map(decode_image, num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE).prefetch(AUTOTUNE)

# # ----- Class Weights -----
# class_weights = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
# class_weights_dict = dict(enumerate(class_weights))

# # ----- Train -----
# model = build_model()
# model.compile(optimizer=AdamW(LR), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS, class_weight=class_weights_dict)

# # ----- Save Weights -----
# model.save_weights("/kaggle/working/model_weights.weights.h5")

# # ----- Show Final Val Accuracy -----
# val_loss, val_acc = model.evaluate(val_ds)
# print(f"✅ Final Validation Accuracy: {val_acc:.4f} | Loss: {val_loss:.4f}")

# # ----- Test Accuracy Note -----
# print("❗Test accuracy can't be shown — Kaggle hides labels for test set.")
# print("📤 Please upload submission.csv to Kaggle to view test leaderboard score.")

# # ----- Predict & Save Submission (COMMENTED OUT) -----
# # test_files = sorted(glob(f"{TEST_DIR}/*.jpg"))
# # test_ids = [os.path.basename(p) for p in test_files]
# # test_ds = tf.data.Dataset.from_tensor_slices(test_files)
# # test_ds = test_ds.map(lambda x: decode_image(x), num_parallel_calls=AUTOTUNE)
# # test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

# # preds = model.predict(test_ds)
# # pred_labels = np.argmax(preds, axis=1)
# # pred_class_names = [CLASS_NAMES[i] for i in pred_labels]

# # submission = pd.DataFrame({
# #     "ImageID": test_ids,
# #     "Class": pred_labels,
# #     "ClassName": pred_class_names
# # })
# # submission.to_csv("submission.csv", index=False)
# # print(f"✅ submission.csv saved with {len(submission)} entries (with both Class and ClassName)")

# # ----- Test Script to Load & Evaluate Saved Model -----
# def test_model(weights_path="/kaggle/working/model_weights.weights.h5"):
#     print("🔄 Building model and loading weights...")
#     model = build_model()
#     model.compile(optimizer=AdamW(LR), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
#     model.load_weights(weights_path)

#     print("📊 Evaluating loaded model on validation set...")
#     loss, acc = model.evaluate(val_ds, verbose=0)
#     print("✅ Loaded model from saved weights")
#     print(f"📈 Validation Accuracy (from saved model): {acc:.4f}")
#     print(f"📉 Validation Loss: {loss:.4f}")
