<a href="https://colab.research.google.com/github/ezinneanne/farmeyeml/blob/new_branch/farmeyeimg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import files

# Upload the ZIP from your computer
uploaded = files.upload()

# Get the uploaded filename (should match your local file)
list(uploaded.keys())

KeyboardInterrupt: 

In [1]:
# UNZIP DATASET
import zipfile, os

# Define the path to the zipped dataset and where to extract it
zip_path = '/content/segmented.zip'
extract_path = '/content/segmented'

# Create the extraction directory if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

# Unzip the dataset to the extraction path
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Dataset unzipped.")

FileNotFoundError: [Errno 2] No such file or directory: 'segmented.zip'

In [None]:
import tensorflow as tf #importing tensorflow for deep learning functionality
from tensorflow.keras import layers, models # Import the 'layers' and 'models' submodules from tensorflow.keras
# 'layers' is used to build different types of neural network layers (e.g., Conv2D, Dense, etc.)
# 'models' provides APIs to create and manage models (Sequential and Functional APIs)

from tensorflow.keras.applications import MobileNetV2 # Import the pre-trained MobileNetV2 model from keras.applications
# MobileNetV2 is a lightweight deep convolutional neural network architecture for mobile and edge devices
# It can be used as a feature extractor or a full model for transfer learning

from sklearn.metrics import classification_report # to make classification report for evaluation
import numpy as np # importing numpy for numerical operations

from tensorflow.keras.models import Sequential  # Importing Sequential model for linear stacking of layers

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import pathlib

In [None]:
# ==== paths ====
data_dir = pathlib.Path("/content/segmented")

img_size = (224, 224)
batch_size = 32
seed = 1337

In [None]:
# LOAD DATASET AND SPLIT
# Load training data from the dataset directory, with 80% for training
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,                 # Path to images
    validation_split=0.15,     # 15% for validation
    subset="training",        # This is the training subset
    seed=seed,                 # Seed for consistent split
    image_size=img_size,  # Resize all images
    batch_size=batch_size     # Number of images per batch
)

# Load validation data (remaining 15%)
val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.15,
    subset="validation",
    seed=seed,
    image_size=img_size,
    batch_size=batch_size
)

# Get class names and number of classes
class_names = train_ds.class_names
num_classes = len(class_names)

In [None]:
# PREFETCHING FOR PERFORMANCE
AUTOTUNE = tf.data.AUTOTUNE

# Cache, shuffle, and prefetch training dataset for better performance
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)

# Cache and prefetch validation dataset (no shuffling needed)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
# ==== data augmentation ====
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomBrightness(factor=0.1),
])

In [None]:
# ==== base model (transfer learning) ====
# weights='imagenet' means the model is loaded with weights learned from training on the ImageNet dataset
# input_shape specifies the shape of input images (height, width, 3 channels for RGB)
# include_top=False excludes the fully connected layers at the top of the model (used for classification in ImageNet)
base = tf.keras.applications.EfficientNetB0(
    include_top=False, input_shape=img_size + (3,), weights="imagenet"
)
base.trainable = False  # Freeze base model layers so its weights will not be updated during training

# Build model on top of EfficientNetB0
inputs = layers.Input(shape=img_size + (3,))
x = data_augmentation(inputs)
x = tf.keras.applications.efficientnet.preprocess_input(x)
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(num_classes, activation="softmax")(x)
model = models.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True, monitor="val_accuracy"),
    ReduceLROnPlateau(patience=2, factor=0.3, monitor="val_loss"),
    ModelCheckpoint("best_seg_model.h5", save_best_only=True, monitor="val_accuracy")
]

history = model.fit(train_ds, validation_data=val_ds, epochs=15, callbacks=callbacks)

# ==== unfreeze top layers for a short fine-tune====
base.trainable = True
for layer in base.layers[:-40]:
    layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
history_ft = model.fit(train_ds, validation_data=val_ds, epochs=5, callbacks=callbacks)

In [None]:
model.save("plant_disease_classifier.keras")

In [None]:
# Evaluate on validation set
val_loss, val_acc = model.evaluate(val_ds)
print(f"Validation accuracy: {val_acc:.3f}")

# Confusion matrix
import numpy as np, itertools
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

y_true, y_pred = [], []
for imgs, labels in val_ds:
    preds = model.predict(imgs, verbose=0)
    y_true.extend(labels.numpy())
    y_pred.extend(np.argmax(preds, axis=1))
cm = confusion_matrix(y_true, y_pred)

print(classification_report(y_true, y_pred, target_names=class_names))

plt.figure(figsize=(10,10))
plt.imshow(cm, interpolation='nearest')
plt.title("Confusion Matrix")
plt.colorbar()
plt.tight_layout()
plt.show()