In [1]:
pip install tensorflow opencv-python-headless matplotlib


Note: you may need to restart the kernel to use updated packages.


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from PIL import Image
import numpy as np
import os
from xml.etree import ElementTree as et
from sklearn.model_selection import train_test_split

# Define your root directories
train_root = '/kaggle/input/fruit-images-for-object-detection/train_zip/train'
val_root = '/kaggle/input/fruit-images-for-object-detection/test_zip/test'

# Define your labels
labels = ['background', 'orange', 'apple', 'banana']
label2targets = {l: t for t, l in enumerate(labels)}
num_classes = len(labels)


In [3]:
def preprocess_img(img_path):
    img = Image.open(img_path).convert('RGB')
    img = img.resize((224, 224))  # Resize the image to a fixed size
    img = np.array(img)
    return img


In [4]:
def load_dataset(root):
    images = []
    targets = []
    xml_paths = sorted([os.path.join(root, file) for file in os.listdir(root) if file.endswith('.xml')])
    for xml_path in xml_paths:
        img_path = xml_path.replace('.xml', '.jpg')
        img = preprocess_img(img_path)
        tree = et.parse(xml_path)
        root = tree.getroot()
        for obj in root.findall('object'):
            label = obj.find('name').text
            label_id = label2targets.get(label, 0)  # Assign 0 (background) if label not found
            box = obj.find('bndbox')
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)
            targets.append([xmin, ymin, xmax, ymax, label_id])  # Store targets as a list
            images.append(img)
    return np.array(images), np.array(targets)


In [5]:
# Load the dataset
X_train, y_train = load_dataset(train_root)
X_val, y_val = load_dataset(val_root)




In [6]:
def get_model(num_classes):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    for layer in base_model.layers:
        layer.trainable = False
    x = layers.GlobalAveragePooling2D()(base_model.output)
    x = layers.Dense(256, activation='relu')(x)
    outputs = layers.Dense(num_classes)(x)
    model = keras.Model(inputs=base_model.input, outputs=outputs)
    return model

# Compile your model
model = get_model(num_classes)
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])


In [7]:
# Train your model
model.fit(X_train, y_train[:, -1], epochs=30, batch_size=32, validation_data=(X_val, y_val[:, -1]))


Epoch 1/30


I0000 00:00:1725295966.491350     927 service.cc:145] XLA service 0x7d3e4c004660 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1725295966.491742     927 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m 3/15[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 45ms/step - accuracy: 0.2934 - loss: 1.7266 

I0000 00:00:1725295970.489083     927 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 694ms/step - accuracy: 0.5665 - loss: 1.0190 - val_accuracy: 0.8974 - val_loss: 0.3495
Epoch 2/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step - accuracy: 0.8912 - loss: 0.3110 - val_accuracy: 0.8547 - val_loss: 0.4909
Epoch 3/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step - accuracy: 0.9137 - loss: 0.3110 - val_accuracy: 0.8718 - val_loss: 0.5624
Epoch 4/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step - accuracy: 0.8932 - loss: 0.2755 - val_accuracy: 0.8034 - val_loss: 0.8895
Epoch 5/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step - accuracy: 0.8734 - loss: 0.3076 - val_accuracy: 0.9145 - val_loss: 0.3744
Epoch 6/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step - accuracy: 0.9363 - loss: 0.1585 - val_accuracy: 0.8974 - val_loss: 0.3857
Epoch 7/30
[1m15/15[0m [32m━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7d3ec81f8040>

In [8]:
# Evaluate your model
model.evaluate(X_val, y_val[:, -1])


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.9167 - loss: 0.2730


[0.4020724296569824, 0.8803418874740601]