In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!ls /content/



drive  sample_data


In [3]:
import zipfile

dataset_path = "/content/drive/MyDrive/CSE465/Mango"  # Make sure this matches the exact file name

print("✅ Dataset extracted successfully!")


✅ Dataset extracted successfully!


In [8]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from sklearn.model_selection import StratifiedKFold
import numpy as np
import os
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define paths
augmented_data_dir = '/content/augmented_dataset'  # Folder where augmented images are stored

# Get the list of all image files and corresponding labels
image_paths = []
image_labels = []

# Traverse the directory to get paths and labels
for label, class_name in enumerate(os.listdir(augmented_data_dir)):
    class_path = os.path.join(augmented_data_dir, class_name)
    if os.path.isdir(class_path):
        for image_name in os.listdir(class_path):
            image_paths.append(os.path.join(class_path, image_name))
            image_labels.append(class_name)  # Use class_name (string) as the label

# Convert to numpy arrays
image_paths = np.array(image_paths)
image_labels = np.array(image_labels)

# Create a DataFrame
df = pd.DataFrame({
    'image_path': image_paths,
    'label': image_labels
})

# Initialize ImageDataGenerator for rescaling
datagen = ImageDataGenerator(rescale=1./255)

# Initialize ResNet50 model with pre-trained weights
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base layers

# Create the model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dense(len(os.listdir(augmented_data_dir)), activation='softmax')  # Adjust the number of classes dynamically
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Set up 5-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True)

accuracies = []
for train_index, val_index in kfold.split(df['image_path'], df['label']):
    # Split the data into training and validation sets based on indices
    train_df = df.iloc[train_index]
    val_df = df.iloc[val_index]

    # Create new ImageDataGenerators for the current fold
    train_generator = datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='image_path',
        y_col='label',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        shuffle=True
    )

    val_generator = datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='image_path',
        y_col='label',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        shuffle=False
    )

    # Train the model on


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.


In [9]:
# Save the trained model
model.save("/content/drive/MyDrive/CSE465/model.h5")  # This will save the model to the specified path




In [10]:
# Step 6: Test Script

import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing import image

# Load the trained model
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/CSE465/model.h5')  # Adjust the path if necessary

# Test the model on new images
test_image_path = "/content/drive/MyDrive/CSE465/Mango/Anthracnose/20211008_124249 (Custom).jpg"  # Path to the test image (replace with your test image path)
test_image = image.load_img(test_image_path, target_size=(224, 224))  # Resize to match model input size
test_image_array = image.img_to_array(test_image)  # Convert image to array
test_image_array = np.expand_dims(test_image_array, axis=0)  # Add batch dimension
test_image_array /= 255.0  # Rescale the image (same preprocessing used during training)

# Predict using the model
prediction = loaded_model.predict(test_image_array)  # Get predictions
predicted_class = np.argmax(prediction, axis=1)  # Get the class with the highest probability

# Display the result
print(f"Predicted class: {predicted_class[0]}")  # Print the predicted class




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Predicted class: 5


In [7]:
!ls /content/augmented_dataset


 Anthracnose	    'Cutting Weevil'  'Gall Midge'  'Powdery Mildew'
'Bacterial Canker'  'Die Back'	       Healthy	    'Sooty Mould'


In [19]:
# Create training and validation data generators
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

# Initialize 5-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True)

accuracies = []

for fold, (train_index, val_index) in enumerate(kfold.split(df['image_path'], df['label']), 1):
    print(f"\nTraining Fold {fold}/5...")

    # Split the data into training and validation sets
    train_df = df.iloc[train_index]
    val_df = df.iloc[val_index]

    # Create training data generator
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='image_path',
        y_col='label',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',  # Categorical because it's multi-class classification
        shuffle=True
    )

    # Create validation data generator
    val_generator = val_datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='image_path',
        y_col='label',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        shuffle=False
    )



Training Fold 1/5...


TypeError: If class_mode="categorical", y_col="label" column values must be type string, list or tuple.

In [20]:
# Clean label column by stripping extra spaces and converting to strings
df['label'] = df['label'].str.strip().astype(str)

# Check for any remaining null or incorrect values
print(df['label'].isnull().sum())  # Should be 0
print(df['label'].head())  # Check the first few labels


AttributeError: Can only use .str accessor with string values!

In [23]:
# Initialize ImageDataGenerator for rescaling
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize the image data to [0, 1] range
    rotation_range=20,      # Rotate images by 20 degrees
    width_shift_range=0.2,  # Shift width by 20%
    height_shift_range=0.2, # Shift height by 20%
    shear_range=0.2,        # Shear transformation
    zoom_range=0.2,         # Random zoom
    horizontal_flip=True,   # Flip images horizontally
    fill_mode='nearest'     # Fill missing pixels
)

val_datagen = ImageDataGenerator(rescale=1./255)  # Just rescale for validation

# Initialize 5-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True)

accuracies = []

for fold, (train_index, val_index) in enumerate(kfold.split(df['image_path'], df['label']), 1):
    print(f"\nTraining Fold {fold}/5...")

    # Split the data into training and validation sets
    train_df = df.iloc[train_index]
    val_df = df.iloc[val_index]

    # Create training data generator
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='image_path',
        y_col='label',
        target_size=(224, 224),  # Resize images to match the model input size
        batch_size=32,
        class_mode='categorical',
        shuffle=True  # Shuffle data for each epoch
    )

    # Create validation data generator
    val_generator = val_datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='image_path',
        y_col='label',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        shuffle=False  # No shuffle for validation
    )

    # Now train the model using train_generator and val_generator
    # (Assuming you have already defined and compiled your model earlier)
    model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=5,  # Change this based on your needs
        validation_data=val_generator,
        validation_steps=len(val_generator)
    )

    # Save the model after each fold (optional)
    model.save(f"/content/drive/MyDrive/CSE465/model_fold_{fold}.h5")



Training Fold 1/5...
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.


  self._warn_if_super_not_called()


Epoch 1/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m792s[0m 8s/step - accuracy: 0.1573 - loss: 2.1268 - val_accuracy: 0.2670 - val_loss: 1.9197
Epoch 2/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m782s[0m 8s/step - accuracy: 0.2241 - loss: 1.9486 - val_accuracy: 0.2927 - val_loss: 1.8312
Epoch 3/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m804s[0m 8s/step - accuracy: 0.2369 - loss: 1.9056 - val_accuracy: 0.3196 - val_loss: 1.7973
Epoch 4/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m760s[0m 8s/step - accuracy: 0.2592 - loss: 1.8460 - val_accuracy: 0.3530 - val_loss: 1.7441
Epoch 5/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m762s[0m 8s/step - accuracy: 0.2689 - loss: 1.8275 - val_accuracy: 0.3517 - val_loss: 1.7081





Training Fold 2/5...
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.


  self._warn_if_super_not_called()


Epoch 1/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m770s[0m 8s/step - accuracy: 0.2868 - loss: 1.8149 - val_accuracy: 0.3543 - val_loss: 1.6942
Epoch 2/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m795s[0m 8s/step - accuracy: 0.3144 - loss: 1.7704 - val_accuracy: 0.2965 - val_loss: 1.6599
Epoch 3/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m821s[0m 8s/step - accuracy: 0.2957 - loss: 1.7818 - val_accuracy: 0.3607 - val_loss: 1.6481
Epoch 4/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m762s[0m 8s/step - accuracy: 0.2946 - loss: 1.7520 - val_accuracy: 0.3453 - val_loss: 1.6990
Epoch 5/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m804s[0m 8s/step - accuracy: 0.3099 - loss: 1.7326 - val_accuracy: 0.3838 - val_loss: 1.6044





Training Fold 3/5...
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.


  self._warn_if_super_not_called()


Epoch 1/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m818s[0m 8s/step - accuracy: 0.3250 - loss: 1.7075 - val_accuracy: 0.3941 - val_loss: 1.5976
Epoch 2/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m762s[0m 8s/step - accuracy: 0.3317 - loss: 1.6927 - val_accuracy: 0.4146 - val_loss: 1.5913
Epoch 3/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m799s[0m 8s/step - accuracy: 0.3340 - loss: 1.6845 - val_accuracy: 0.3864 - val_loss: 1.6210
Epoch 4/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m816s[0m 8s/step - accuracy: 0.3657 - loss: 1.6637 - val_accuracy: 0.4018 - val_loss: 1.5799
Epoch 5/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m805s[0m 8s/step - accuracy: 0.3605 - loss: 1.6598 - val_accuracy: 0.3787 - val_loss: 1.5658





Training Fold 4/5...
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.


  self._warn_if_super_not_called()


Epoch 1/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m755s[0m 8s/step - accuracy: 0.3592 - loss: 1.6547 - val_accuracy: 0.3915 - val_loss: 1.5740
Epoch 2/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m800s[0m 8s/step - accuracy: 0.3640 - loss: 1.6139 - val_accuracy: 0.4519 - val_loss: 1.5289
Epoch 3/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m801s[0m 8s/step - accuracy: 0.3640 - loss: 1.6639 - val_accuracy: 0.4275 - val_loss: 1.4929
Epoch 4/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m802s[0m 8s/step - accuracy: 0.3909 - loss: 1.6181 - val_accuracy: 0.4801 - val_loss: 1.4884
Epoch 5/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m802s[0m 8s/step - accuracy: 0.3858 - loss: 1.6202 - val_accuracy: 0.4480 - val_loss: 1.4954





Training Fold 5/5...
Found 3116 validated image filenames belonging to 8 classes.
Found 779 validated image filenames belonging to 8 classes.


  self._warn_if_super_not_called()


Epoch 1/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m811s[0m 8s/step - accuracy: 0.4113 - loss: 1.5960 - val_accuracy: 0.4454 - val_loss: 1.5226
Epoch 2/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m804s[0m 8s/step - accuracy: 0.3905 - loss: 1.6208 - val_accuracy: 0.4724 - val_loss: 1.4808
Epoch 3/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m799s[0m 8s/step - accuracy: 0.3833 - loss: 1.6020 - val_accuracy: 0.4262 - val_loss: 1.4985
Epoch 4/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m802s[0m 8s/step - accuracy: 0.4153 - loss: 1.5453 - val_accuracy: 0.5109 - val_loss: 1.4389
Epoch 5/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m802s[0m 8s/step - accuracy: 0.4196 - loss: 1.5571 - val_accuracy: 0.4223 - val_loss: 1.5427




In [22]:
# Convert 'label' column to string type first
df['label'] = df['label'].astype(str)

# Now clean the label column
df['label'] = df['label'].str.strip()

# Check for any null or incorrect values
print(df['label'].isnull().sum())  # Should be 0
print(df['label'].head())  # Check the first few labels


0
0    0
1    0
2    0
3    0
4    0
Name: label, dtype: object


In [21]:
# Check the type of the 'label' column
print(df['label'].dtype)


int64


In [24]:
from google.colab import auth
auth.authenticate_user()


In [26]:
!git clone https://github.com/imranxw/CSE465_Spring2025_Group-12.git


Cloning into 'CSE465_Spring2025_Group-12'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (3/3), 323.29 KiB | 1.78 MiB/s, done.


In [27]:
!mv /content/model_final.keras /content/CSE465_Spring2025_Group-12/
!mv /content/your_script.py /content/CSE465_Spring2025_Group-12/


mv: cannot stat '/content/model_final.keras': No such file or directory
mv: cannot stat '/content/your_script.py': No such file or directory


In [28]:
%cd /content/CSE465_Spring2025_Group-12


/content/CSE465_Spring2025_Group-12


In [29]:
!git add .


In [35]:
!git config --global user.name "imranxw"


In [36]:
!git config user.name "imranxw"
!git config user.email "imran.khan05@northsouth.edu"


In [37]:
!git commit -m "Added trained model and evaluation scripts"


On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [38]:
!git push origin main


fatal: could not read Username for 'https://github.com': No such device or address


In [39]:
!git status


On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [40]:
!ls


Welcome_To_Colab.ipynb


In [41]:
!git add .


In [42]:
!git commit -m "Added trained model and evaluation scripts"


On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [45]:
!ls -a


.  ..  .git  Welcome_To_Colab.ipynb


In [44]:
!git add model.h5


fatal: pathspec 'model.h5' did not match any files


In [46]:
model.save('model.h5')




In [47]:
!git add model.h5
!git commit -m "Added trained model"
!git push origin main


[main 7afb6a3] Added trained model
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 model.h5
fatal: could not read Username for 'https://github.com': No such device or address


In [48]:
model.save('model.h5')




In [50]:
model.save('model.keras')


In [51]:
import os
print(os.getcwd())


/content/CSE465_Spring2025_Group-12


In [52]:
!ls


model.h5  model.keras  Welcome_To_Colab.ipynb


In [6]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# Set directories
original_data_dir = "/content/drive/MyDrive/CSE465/Mango"  # Folder with original images
augmented_data_dir = "/content/augmented_dataset"  # Folder for augmented images
os.makedirs(augmented_data_dir, exist_ok=True)

# Create an ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=20,      # Rotate images by 20 degrees
    width_shift_range=0.2,  # Shift width by 20%
    height_shift_range=0.2, # Shift height by 20%
    shear_range=0.2,        # Shear transformation
    zoom_range=0.2,         # Random zoom
    horizontal_flip=True,   # Flip images horizontally
    fill_mode='nearest'     # Fill missing pixels
)

# Load original images and apply augmentation
batch_size = 32  # Number of images to process at a time
num_augmented_images = 0

for class_name in os.listdir(original_data_dir):  # Loop through each category
    class_path = os.path.join(original_data_dir, class_name)
    save_path = os.path.join(augmented_data_dir, class_name)
    os.makedirs(save_path, exist_ok=True)

    for image in os.listdir(class_path):  # Loop through images
        img_path = os.path.join(class_path, image)
        img = tf.keras.preprocessing.image.load_img(img_path)
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        img_array = img_array.reshape((1,) + img_array.shape)  # Reshape for generator

        # Generate new images and save
        i = 0
        for batch in datagen.flow(img_array, batch_size=1, save_to_dir=save_path, save_prefix="aug", save_format="jpg"):
            i += 1
            num_augmented_images += 1
            if i >= 1:  # Generate 1 new image per original image (adjust for 30% increase)
                break

print(f"✅ Data augmentation completed! Added {num_augmented_images} images.")


✅ Data augmentation completed! Added 4000 images.
