In [1]:
import os
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split




In [2]:
# Define dataset path
DATASET_PATH = './data/leaf_coffe_dataset'
IMG_SIZE = 224
BATCH_SIZE = 32

# Get class names
class_names = os.listdir(DATASET_PATH)
NUM_CLASSES = len(class_names)

# List all image paths and labels
image_paths = []
labels = []

for i, class_name in enumerate(class_names):
    class_path = os.path.join(DATASET_PATH, class_name)
    for img in os.listdir(class_path):
        image_paths.append(os.path.join(class_path, img))
        labels.append(i)  # Assign a numeric label

In [3]:
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    image_paths, labels, test_size=0.3, stratify=labels, random_state=42
)

# Split temp (30%) into 20% test, 10% validation
test_paths, val_paths, test_labels, val_labels = train_test_split(
    temp_paths, temp_labels, test_size=1/3, stratify=temp_labels, random_state=42
)

# Print dataset sizes
print(f"Training set: {len(train_paths)} images")
print(f"Testing set: {len(test_paths)} images")
print(f"Validation set: {len(val_paths)} images")

Training set: 40984 images
Testing set: 11710 images
Validation set: 5855 images


In [4]:
train_datagen = ImageDataGenerator(rescale=1.0/255.0)
test_val_datagen = ImageDataGenerator(rescale=1.0/255.0)

# Training generator (70%)
train_generator = train_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset=None  # No predefined subset, using custom split
)

# Test generator (20%)
test_generator = test_val_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset=None,
    shuffle=False
)

# Validation generator (10%)
val_generator = test_val_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset=None,
    shuffle=False
)

Found 58549 images belonging to 5 classes.
Found 58549 images belonging to 5 classes.
Found 58549 images belonging to 5 classes.


In [5]:
base_model = MobileNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights="imagenet")
base_model.trainable = False  # Freeze base layers

# Add Custom Layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation="relu")(x)
x = Dropout(0.3)(x)
output_layer = Dense(NUM_CLASSES, activation="softmax")(x)

# Create Model
model = Model(inputs=base_model.input, outputs=output_layer)

# Compile Model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# Train Model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=3
)



Epoch 1/3


Epoch 2/3
Epoch 3/3
