In [1]:


# Install the Kaggle library
!pip install kaggle

# Import the files module from Colab
from google.colab import files

# Prompt you to upload your kaggle.json file
print("Please upload your kaggle.json file:")
files.upload()

# Create a Kaggle directory, move the file, and set permissions
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

print("Kaggle API configured successfully!")


Please upload your kaggle.json file:


Saving kaggle.json to kaggle.json
Kaggle API configured successfully!


In [2]:
## Step 2: Download and Unzip the Dataset

# Download the dataset using its Kaggle API name
!kaggle datasets download -d kritikseth/fruit-and-vegetable-image-recognition

# Unzip the downloaded file (quietly)
!unzip -q fruit-and-vegetable-image-recognition.zip

print("Dataset downloaded and unzipped.")

# --- Define Paths ---
# In Colab, the unzipped folders are right here
train_path = 'train'
validation_path = 'validation'
test_path = 'test'

# --- Verify the paths ---
print(f"\nChecking path: {train_path}")
print("Contents of train path (should show 'apple', 'banana', etc.):")
!ls {train_path} | head -n 10

Dataset URL: https://www.kaggle.com/datasets/kritikseth/fruit-and-vegetable-image-recognition
License(s): CC0-1.0
Downloading fruit-and-vegetable-image-recognition.zip to /content
 99% 1.96G/1.98G [00:22<00:00, 121MB/s] 
100% 1.98G/1.98G [00:22<00:00, 92.8MB/s]
Dataset downloaded and unzipped.

Checking path: train
Contents of train path (should show 'apple', 'banana', etc.):
apple
banana
beetroot
bell pepper
cabbage
capsicum
carrot
cauliflower
chilli pepper
corn


In [11]:
## Step 3: Create DataFrames for Image Loading (FIXED)
import pandas as pd
import os

# Define the lists of fruits and vegetables from the case study
fruits_list = [
    "Banana", "Apple", "Pear", "Grapes", "Orange", "Kiwi",
    "Watermelon", "Pomegranate", "Pineapple", "Mango"
]

vegetables_list = [
    "Cucumber", "Carrot", "Capsicum", "Onion", "Potato", "Lemon",
    "Tomato", "Radish", "Beetroot", "Cabbage", "Lettuce", "Spinach",
    "Soybean", "Cauliflower", "Bell Pepper", "Chilly", "Pepper",
    "Turnip", "Corn", "Sweetcorn", "Sweet Potato", "Paprika",
    "Jalapeño", "Ginger", "Garlic", "Peas", "Eggplant"
]

# Function to create a DataFrame map
def create_dataframe(original_dir):
    image_data = []

    for category in os.listdir(original_dir): # 'category' will be "apple", "banana" (lowercase)
        category_path = os.path.join(original_dir, category)

        if not os.path.isdir(category_path):
            continue

        # --- THIS IS THE FIX ---
        # Convert the lowercase folder name (e.g., "apple")
        # to an uppercase name (e.g., "Apple") before checking.
        category_name = category.title()

        label = ""
        # Now check the capitalized name against the lists
        if category_name in fruits_list:
            label = "Fruit"
        elif category_name in vegetables_list:
            label = "Vegetable"
        else:
            continue

        for img_file in os.listdir(category_path):
            file_path = os.path.join(category_path, img_file)
            image_data.append((file_path, label))

    return pd.DataFrame(image_data, columns=['filename', 'class'])

# Create the DataFrames
train_df = create_dataframe(train_path)
validation_df = create_dataframe(validation_path)
test_df = create_dataframe(test_path)

# --- CRITICAL CHECK ---
# This output should now be correct.
print("--- Training DataFrame Head ---")
print(train_df.head())
print(f"\nFound {len(train_df)} training images.")
print(f"Found {len(validation_df)} validation images.")
print(f"Found {len(test_df)} test images.")

--- Training DataFrame Head ---
                   filename  class
0  train/apple/Image_42.jpg  Fruit
1  train/apple/Image_78.jpg  Fruit
2  train/apple/Image_82.jpg  Fruit
3  train/apple/Image_65.png  Fruit
4  train/apple/Image_50.jpg  Fruit

Found 2693 training images.
Found 304 validation images.
Found 309 test images.


In [13]:
## Step 4: Create Image Data Generators
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define image size and batch size
IMG_SIZE = (150, 150)
BATCH_SIZE = 32

# Create ImageDataGenerators to rescale images (pixels 0-255 -> 0-1)
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# --- Create generators from the DataFrames ---
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filename',         # Column with image paths
    y_col='class',            # Column with labels
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'       # Binary problem: Fruit or Vegetable
)

validation_generator = validation_datagen.flow_from_dataframe(
    dataframe=validation_df,
    x_col='filename',
    y_col='class',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='filename',
    y_col='class',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False # Keep test data in order
)

print(f"Class indices: {train_generator.class_indices}")

Found 2693 validated image filenames belonging to 2 classes.
Found 304 validated image filenames belonging to 2 classes.
Found 309 validated image filenames belonging to 2 classes.
Class indices: {'Fruit': 0, 'Vegetable': 1}


In [14]:
## Step 5: Build and Compile the Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.applications import MobileNetV2


base_model = MobileNetV2(
    input_shape=(150, 150, 3),
    include_top=False,
    weights='imagenet'
)


base_model.trainable = False


model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])


model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


model.summary()


  base_model = MobileNetV2(


In [15]:
## Step 6: Train the Model

EPOCHS = 10

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator
)

print("Model training complete.")

  self._warn_if_super_not_called()


Epoch 1/10
[1m 3/85[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:35[0m 1s/step - accuracy: 0.6615 - loss: 0.8047   



[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 2s/step - accuracy: 0.7898 - loss: 0.4702 - val_accuracy: 0.9474 - val_loss: 0.1419
Epoch 2/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 2s/step - accuracy: 0.9064 - loss: 0.2374 - val_accuracy: 0.9539 - val_loss: 0.1133
Epoch 3/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 1s/step - accuracy: 0.9333 - loss: 0.1699 - val_accuracy: 0.9737 - val_loss: 0.0947
Epoch 4/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 1s/step - accuracy: 0.9546 - loss: 0.1142 - val_accuracy: 0.9868 - val_loss: 0.0683
Epoch 5/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 2s/step - accuracy: 0.9577 - loss: 0.1149 - val_accuracy: 0.9836 - val_loss: 0.0598
Epoch 6/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 1s/step - accuracy: 0.9635 - loss: 0.0938 - val_accuracy: 0.9901 - val_loss: 0.0494
Epoch 7/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━

In [16]:
## Step 7: Evaluate the Model

print("Evaluating model on test data...")
test_loss, test_accuracy = model.evaluate(test_generator)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Evaluating model on test data...
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - accuracy: 0.9896 - loss: 0.0284
Test Loss: 0.042614102363586426
Test Accuracy: 99.03%
