In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

## import numpy as np # linear algebra
## import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

## import os
## for dirname, _, filenames in os.walk('/kaggle/input'):
##    for filename in filenames:
##        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
import os
from collections import Counter

data_dir = '/kaggle/input/fine-grained-fruit-quality-assessment/train/train'

class_counts = {}

for class_name in os.listdir(data_dir):
    class_folder = os.path.join(data_dir, class_name)
    if os.path.isdir(class_folder):
        num_images = len(os.listdir(class_folder))
        class_counts[class_name] = num_images

total_images = sum(class_counts.values())

class_proportions = {class_name: count / total_images for class_name, count in class_counts.items()}

print("Class Proportions:")
for class_name, proportion in class_proportions.items():
    print(f"{class_name}: {proportion:.2f}")


Class Proportions:
tomato_fully_ripened: 0.01
tomato_half_ripened: 0.01
banana_overripe: 0.21
banana_rotten: 0.30
banana_unripe: 0.21
banana_ripe: 0.22
tomato_green: 0.05


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#resizing
IMG_SIZE = (224, 224)
#scaling and splits
data = ImageDataGenerator(rescale=1./255, validation_split=0.2)#stratified splits for unbalanced data

trainData = data.flow_from_directory(
    '/kaggle/input/fine-grained-fruit-quality-assessment/train/train',
    target_size=IMG_SIZE,
    batch_size=32,
    class_mode='sparse',#uses label encoding
    shuffle=True,
    subset='training'
)

valData = data.flow_from_directory(
    '/kaggle/input/fine-grained-fruit-quality-assessment/train/train',
    target_size=IMG_SIZE,
    batch_size=32,
    class_mode='sparse',
    shuffle=False,
    subset='validation'
)

testData = data.flow_from_directory(
    '/kaggle/input/fine-grained-fruit-quality-assessment/test',
    target_size=IMG_SIZE,
    batch_size=32,
    class_mode='sparse',
    shuffle=False
)


2025-05-12 21:47:52.183786: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747086472.206626    1931 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747086472.213590    1931 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Found 5917 images belonging to 7 classes.
Found 1478 images belonging to 7 classes.
Found 2484 images belonging to 1 classes.


In [6]:
import tensorflow as tf
from tensorflow.keras import layers, Model



def convolutionBlock(x, filters, kernel_size=3, strides=1):
    x = layers.Conv2D(filters, kernel_size, strides=strides, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    return layers.ReLU()(x)

def residuals(x, filters, downsample=False):
    shortcut = x
    strides = 2 if downsample else 1

    x = convolutionBlock(x, filters, strides=strides)
    x = layers.Conv2D(filters, 3, strides=1, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    if downsample or x.shape[-1] != shortcut.shape[-1]:
        shortcut = layers.Conv2D(filters, 1, strides=strides, use_bias=False)(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.Add()([x, shortcut])
    return layers.ReLU()(x)



def ResNet34(inputShape=(224, 224, 3), num_classes=7):
    
    inputs = layers.Input(shape=inputShape)
    x = convolutionBlock(inputs, 64, kernel_size=7, strides=2)
    x = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(x)

    for filters, blocks, downsample in zip([64, 128, 256, 512], [3, 4, 6, 3], [False, True, True, True]):
        for i in range(blocks):
            x = residuals(x, filters, downsample=(i == 0 and downsample))

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(num_classes, activation='softmax')(x)
    return Model(inputs, x)


In [7]:
from tensorflow.keras import layers, regularizers, Model

base_model = ResNet34(inputShape=(224, 224, 3), num_classes=7)

# x = base_model.layers[-2].output  # or base_model.output if needed

# x = layers.Dense(512, kernel_regularizer=regularizers.l2(0.01))(x)
# x = layers.LeakyReLU(alpha=0.1)(x)
# x = layers.Dropout(0.5)(x)

# x = layers.Dense(256, kernel_regularizer=regularizers.l2(0.01))(x)
# x = layers.LeakyReLU(alpha=0.1)(x)
# x = layers.Dropout(0.3)(x)

# outputs = layers.Dense(7, activation='softmax')(x)


x = base_model.layers[-2].output  # or base_model.output if -2 doesn't fit your architecture
x = layers.Dense(512, kernel_regularizer=regularizers.l2(0.01))(x)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.Dropout(0.5)(x)

x = layers.Dense(256, kernel_regularizer=regularizers.l2(0.01))(x)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(7, activation='softmax')(x)

# x = base_model.layers[-2].output
# x = layers.Dense(512, activation='relu')(x)
# x = layers.Dropout(0.5)(x)
# x = layers.Dense(256, activation='relu')(x)
# x = layers.Dropout(0.3)(x)
# outputs = layers.Dense(7, activation='softmax')(x)

# x = base_model.layers[-2].output
# x = layers.Dense(256, activation='relu')(x)
# x = layers.Dropout(0.5)(x)
# outputs = layers.Dense(7, activation='softmax')(x)

finalModel = Model(inputs=base_model.input, outputs=outputs)

finalModel.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])

finalModel.fit(trainData, validation_data=valData, epochs=20)

finalModel.save_weights('/kaggle/working/ResNet34.weights.h5')


I0000 00:00:1747086479.229576    1931 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1747086479.230237    1931 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Epoch 1/20


  self._warn_if_super_not_called()
I0000 00:00:1747086509.008949    2004 service.cc:148] XLA service 0x7a3044062e00 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747086509.008996    2004 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1747086509.009000    2004 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1747086511.426700    2004 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1747086523.771558    2004 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 405ms/step - accuracy: 0.6078 - loss: 7.4153 - val_accuracy: 0.4777 - val_loss: 5.2806
Epoch 2/20
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 320ms/step - accuracy: 0.7731 - loss: 2.1419 - val_accuracy: 0.6353 - val_loss: 1.6622
Epoch 3/20
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 322ms/step - accuracy: 0.8119 - loss: 1.0719 - val_accuracy: 0.3945 - val_loss: 2.0707
Epoch 4/20
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 321ms/step - accuracy: 0.8413 - loss: 0.7313 - val_accuracy: 0.6976 - val_loss: 1.0095
Epoch 5/20
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 318ms/step - accuracy: 0.8420 - loss: 0.6380 - val_accuracy: 0.3694 - val_loss: 4.7951
Epoch 6/20
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 306ms/step - accuracy: 0.8721 - loss: 0.5660 - val_accuracy: 0.5765 - val_loss: 1.3702
Epoch 7/20
[1m185/1

In [8]:
finalModel.load_weights('/kaggle/working/ResNet34.weights.h5')

val_loss, val_accuracy = finalModel.evaluate(valData)
print(f"Validation Accuracy: {val_accuracy:.4f}")


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 269ms/step - accuracy: 0.9239 - loss: 0.3424
Validation Accuracy: 0.9154


In [11]:
import pandas as pd

predicted_classes = tf.argmax(finalModel.predict(testData), axis=1)

filenames = testData.filenames

df_predictions = pd.DataFrame({
    'ImageID': [os.path.basename(f) for f in filenames],
    'Class': predicted_classes
})


df_predictions.to_csv('/kaggle/working/test_predictions.csv', index=False)

[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 194ms/step
