In [None]:
import os
from collections import Counter

data_dir = '/kaggle/input/fine-grained-fruit-quality-assessment/train/train'

class_counts = {}

for class_name in os.listdir(data_dir):
    class_folder = os.path.join(data_dir, class_name)
    if os.path.isdir(class_folder):
        num_images = len(os.listdir(class_folder))
        class_counts[class_name] = num_images

total_images = sum(class_counts.values())

class_proportions = {class_name: count / total_images for class_name, count in class_counts.items()}

print("Class Proportions:")
for class_name, proportion in class_proportions.items():
    print(f"{class_name}: {proportion:.2f}")


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#resizing
IMG_SIZE = (224, 224)
#scaling and splits
data = ImageDataGenerator(rescale=1./255, validation_split=0.2)#stratified splits for unbalanced data

trainData = data.flow_from_directory(
    '/kaggle/input/fine-grained-fruit-quality-assessment/train/train',
    target_size=IMG_SIZE,
    batch_size=32,
    class_mode='sparse',#uses label encoding
    shuffle=True,
    subset='training'
)

valData = data.flow_from_directory(
    '/kaggle/input/fine-grained-fruit-quality-assessment/train/train',
    target_size=IMG_SIZE,
    batch_size=32,
    class_mode='sparse',
    shuffle=False,
    subset='validation'
)

testData = data.flow_from_directory(
    '/kaggle/input/fine-grained-fruit-quality-assessment/test',
    target_size=IMG_SIZE,
    batch_size=32,
    class_mode='sparse',
    shuffle=False
)


**HELPER** **FUNCTIONS** (USED FOR ALL MODELS)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import pandas as pd

In [None]:
# Title: Model Compilation
# Description: Compiles the model with the Adam optimizer, sparse categorical crossentropy loss, and accuracy metric.

def compile_model(model):
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [None]:
# Title: Callback Generator
# Description: Returns a list of callbacks including EarlyStopping to prevent overfitting and ModelCheckpoint to save the best model weights during training.

from tensorflow.keras import callbacks # Import the callbacks module

def get_callbacks(model_name='baseline'): 
    ##checkpoint = callbacks.ModelCheckpoint(
    ##    f'{model_name}_weights.h5', 
    ##    save_best_only=True, 
    ##    monitor='val_accuracy', mode='max')
    ###
    checkpoint = callbacks.ModelCheckpoint(
    f'{model_name}.weights.h5',  # <- changed _ to .
    save_best_only=True,
    monitor='val_accuracy',
    mode='max',
    save_weights_only=True) # <- added this
    # early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    return checkpoint
    

In [None]:
# Title: Model Training Wrapper
# Description: Trains any given model using training and validation data, applying specified callbacks such as early stopping and checkpointing.

def train_model(model, trainData, valData, model_name='baseline', epochs=30):
    history = model.fit(
        trainData,
        validation_data=valData,
        epochs=epochs,
        callbacks=get_callbacks(model_name)
    )
    return history
    

In [None]:
# Title: Validation Evaluation & Reporting
# Description: Evaluates the model on the validation set, printing a classification report and displaying a confusion matrix.

def evaluate_model(model, valData, class_indices, model_name='Model'):
    val_preds = np.argmax(model.predict(valData), axis=1)
    y_true = valData.classes
    print(f"Evaluation Report for {model_name}")
    print(classification_report(y_true, val_preds, target_names=class_indices.keys()))
    plot_confusion_matrix(y_true, val_preds, class_indices)


In [None]:
# Title: Confusion Matrix Plotter
# Description: Plots a labeled heatmap confusion matrix comparing predicted vs true labels for easy visual analysis.

def plot_confusion_matrix(y_true, y_pred, class_indices):
    cm = confusion_matrix(y_true, y_pred)
    labels = list(class_indices.keys())
    plt.figure(figsize=(12, 8))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()
    

In [None]:
# Title: Training History Plotter
# Description: Plots training and validation accuracy/loss curves over epochs to visualize model performance trends.

def plot_training_history(history):
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(history.history['accuracy'], label='Train Acc')
    plt.plot(history.history['val_accuracy'], label='Val Acc')
    plt.title('Accuracy over Epochs')
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.title('Loss over Epochs')
    plt.legend()
    plt.show()
    

In [None]:
# Title: Test Prediction Exporter
# Description: Uses the trained model to predict labels for the test dataset and saves the results in a CSV file formatted for Kaggle submission.

def export_predictions(model, testData, output_filename="Team_CHP_5_predictions.csv"):
    test_preds = np.argmax(model.predict(testData), axis=1)
    filenames = testData.filenames
    df = pd.DataFrame({
        "ImageID": [os.path.basename(f) for f in filenames],
        "Class": test_preds
    })
    df.to_csv(output_filename, index=False)
    print(f"Predictions exported to {output_filename}")
    

BASELINE **CNN** Model

In [None]:
# BASELINE CNN MODEL

from tensorflow.keras import layers, models

def build_baseline_model(input_shape, num_classes):
    model = models.Sequential(name="Team_CHP_5_CNN_Baseline")
    model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Dropout(0.25))

    model.add(layers.Conv2D(64, (3,3), activation='relu'))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Dropout(0.25))

    model.add(layers.Conv2D(128, (3,3), activation='relu'))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Dropout(0.25))

    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_classes, activation='softmax'))

    return model
    

In [None]:
# RUN BASELINE CNN MODEL
baseline_model = build_baseline_model(input_shape=(224, 224, 3), num_classes=trainData.num_classes)
baseline_model = compile_model(baseline_model)
baseline_model.summary()
history = train_model(baseline_model, trainData, valData, model_name="Team_CHP_5_CNN", epochs=50)
evaluate_model(baseline_model, valData, class_indices=trainData.class_indices, model_name="Team_CHP_5_CNN") 
plot_training_history(history)
export_predictions(baseline_model, testData, output_filename="Team_CHP_5_predictions.csv")

In [None]:
# Save model weights (already saved by ModelCheckpoint)
weights_path = "Team_CHP_5_CNN.weights.h5"

# Save the full model (architecture + weights + optimizer state)
baseline_model.save("/kaggle/working/Team_CHP_5_full_model.h5")

# Save the prediction CSV to working directory (already done inside export_predictions)
import shutil
shutil.move("Team_CHP_5_predictions.csv", "/kaggle/working/Team_CHP_5_predictions.csv")
shutil.move(weights_path, f"/kaggle/working/{weights_path}")  # move weights too
