In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import subprocess
import shutil

# Standard Utility Libraries
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sns

# TensorFlow/Keras Deep Learning Libraries
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.models import Sequential, save_model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam

In [None]:
# --- Configuration ---
# Set the desired image size (will be cropped/resized to this)
SIZE = 128
# Configuration for the Kaggle Dataset
KAGGLE_DATASET_ID = 'alxmamaev/flowers-recognition'
DESTINATION_DIR = './Image_CLF_Datasets/'
FLOWERS_DIR = os.path.join(DESTINATION_DIR, 'flowers/')
# --- End Configuration ---

In [None]:
# # create config dir
# mkdir -p ~/.config/kaggle

# # move kaggle.json (example: you uploaded it to the workspace root)
# mv /workspaces/flower-detection/kaggle.json ~/.config/kaggle/

# # set secure perms and correct ownership to current user
# chmod 600 ~/.config/kaggle/kaggle.json
# sudo chown $(id -un):$(id -gn) ~/.config/kaggle -R

# # verify
# ls -l ~/.config/kaggle

In [None]:
# ...existing code...
# Ensure Kaggle credentials are present before importing kaggle
kaggle_json = os.path.expanduser("~/.config/kaggle/kaggle.json")
if os.path.exists(kaggle_json):
    os.chmod(kaggle_json, 0o600)
    import kaggle
    print("Kaggle API configured via ~/.config/kaggle/kaggle.json")
elif os.environ.get("KAGGLE_USERNAME") and os.environ.get("KAGGLE_KEY"):
    import kaggle
    print("Kaggle API configured via environment variables")
else:
    raise SystemExit(
        "Kaggle credentials not found. Place kaggle.json in ~/.config/kaggle/ or set KAGGLE_USERNAME/KAGGLE_KEY."
    )
# ...existing code...

In [None]:
## 1. Data Fetching and Preparation 🚀

# Ensure Kaggle API is available and configured
try:
    import kaggle
    print("Kaggle API successfully imported.")
except ImportError:
    print("Kaggle library not found. Installing now...")
    subprocess.run(['pip', 'install', 'kaggle'], check=True)
    import kaggle # Re-import after installation

In [None]:
def download_and_extract_kaggle_dataset(dataset_id, dest_dir):
    """Downloads and extracts the Kaggle dataset."""
    
    os.makedirs(dest_dir, exist_ok=True)
    
    # Use the Kaggle CLI to download the dataset
    print(f"Downloading dataset: {dataset_id}...")
    try:
        # Downloads the zip file to the destination directory
        subprocess.run(['kaggle', 'datasets', 'download', '-d', dataset_id, '-p', dest_dir], check=True)
        print("Download complete.")
    except subprocess.CalledProcessError:
        print("\nERROR: Kaggle download failed. Please check credentials and connection.")
        print("Ensure the 'kaggle.json' file is correctly placed in ~/.kaggle/ for API authentication.")
        return False
        
    # Find and extract the downloaded zip file
    zip_files = [f for f in os.listdir(dest_dir) if f.endswith('.zip')]
    if not zip_files:
        print("Error: Downloaded zip file not found.")
        return False

    zip_path = os.path.join(dest_dir, zip_files[0])
    
    print(f"Extracting {zip_files[0]}...")
    shutil.unpack_archive(zip_path, dest_dir)
    os.remove(zip_path) # Clean up the zip file
    print(f"Data ready at: {FLOWERS_DIR}")
    
    # Renaming the extracted folder if necessary (specific to this dataset)
    extracted_folder_name = 'flowers'
    if extracted_folder_name not in os.listdir(dest_dir):
        # A common issue is the folder name being "flowers-recognition" or similar
        print("Attempting to locate flower images...")
        
    return True

In [None]:
# Check if data is already present before downloading
if not os.path.isdir(FLOWERS_DIR) or not os.listdir(FLOWERS_DIR):
    print("Dataset not found locally. Starting download and extraction...")
    success = download_and_extract_kaggle_dataset(KAGGLE_DATASET_ID, DESTINATION_DIR)
    if not success:
        raise SystemExit("Dataset setup failed. Cannot proceed with training.")
else:
    print(f"Dataset already found at {FLOWERS_DIR}. Skipping download.")

In [None]:
# --- Data Loading and Preprocessing ---
data = []
label = []

# Assuming the 'flowers' folder contains subfolders, where each subfolder name is a class label.
for folder in os.listdir(FLOWERS_DIR):
    folder_path = os.path.join(FLOWERS_DIR, folder)
    if os.path.isdir(folder_path):
        for file in os.listdir(folder_path):
            if file.endswith("jpg"):
                label.append(folder)
                # Read, convert (BGR to RGB), and resize image
                img = cv2.imread(os.path.join(folder_path, file))
                if img is not None:
                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    im = cv2.resize(img_rgb, (SIZE, SIZE))
                    data.append(im)

# Convert data into numerical values
X = np.array(data)
label_arr = np.array(label)

# Use label encoder and one-hot encode the data
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(label_arr)

# Get the list of class names in the order they were encoded
categories = encoder.classes_
NUM_CLASSES = len(categories)
print(f"\nDetected {NUM_CLASSES} classes: {categories}")

# One-hot encoding
y = to_categorical(y_encoded, NUM_CLASSES)

# Normalize the image data (scaling pixel values to [0, 1])
X = X / 255.0

# Split the dataset into 70% training and 30% testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
print(f"Train samples: {X_train.shape[0]}, Test samples: {X_test.shape[0]}")

In [None]:
## 2. Build Convolutional Neural Network (CNN) Architecture 🏗️

model = Sequential(name='Flower_Classifier_CNN')

# First Convolutional Block
model.add(Conv2D(filters=64, kernel_size=(3,3), padding='Same', activation='relu', input_shape=(SIZE, SIZE, 3), name='Conv_1_64'))
model.add(MaxPooling2D(pool_size=(2,2), name='Pool_1'))

# Second Convolutional Block (Triple Conv Layers)
model.add(Conv2D(filters=128, kernel_size=(3,3), padding='Same', activation='relu', name='Conv_2_128_a'))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding='Same', activation='relu', name='Conv_2_128_b'))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding='Same', activation='relu', name='Conv_2_128_c'))
model.add(MaxPooling2D(pool_size=(2,2), name='Pool_2'))

# Fully Connected Layers (Classifier)
model.add(Flatten(name='Flatten_Layer'))
model.add(Dense(128, activation='relu', name='Dense_128'))
model.add(Dense(64, activation='relu', name='Dense_64'))
model.add(Dropout(rate=0.25, name='Dropout_0_25'))
# Output Layer
model.add(Dense(NUM_CLASSES, activation='softmax', name='Output_5_Classes'))

print("\n--- Model Summary ---")
model.summary()

In [None]:
# Generate model plot (requires pydot and graphviz)
try:
    plot_model(model, to_file='model_architecture.png', show_shapes=True, show_layer_names=True)
    print("\nModel architecture saved to model_architecture.png")
except ImportError:
    print("\nWarning: pydot or graphviz not installed. Skipping model plot generation.")

In [None]:
## 3. Data Augmentation and Model Training ⚙️

# Image Data Augmentation settings
datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.20,
    width_shift_range=0.3,
    height_shift_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

datagen.fit(X_train)

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

batch_size = 32
epochs = 64

# Use Model.fit() which replaces the deprecated Model.fit_generator
print("\n--- Model Training Started ---")
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=batch_size),
    epochs=epochs,
    validation_data=(X_test, y_test),
    verbose=2
)
print("--- Model Training Finished ---")

# Save the trained model artifact
save_model(model, 'flower_classifier_model.h5')
print("\nTrained model saved as 'flower_classifier_model.h5'")

In [None]:
## 4. Evaluation and Visualization 📊

# --- A. Plot Training History ---
def plot_history(history):
    """Plots the training and validation loss and accuracy history."""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Plot training & validation accuracy values
    ax1.plot(history.history['accuracy'], label='Train Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_ylabel('Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.legend(loc='lower right')
    ax1.grid(True)

    # Plot training & validation loss values
    ax2.plot(history.history['loss'], label='Train Loss')
    ax2.plot(history.history['val_loss'], label='Validation Loss')
    ax2.set_title('Model Loss')
    ax2.set_ylabel('Loss')
    ax2.set_xlabel('Epoch')
    ax2.legend(loc='upper right')
    ax2.grid(True)
    
    plt.show()

plot_history(history)

In [None]:
# --- B. Visualize Predictions on Test Set ---

# Run prediction once for efficiency
y_pred_probs = model.predict(X_test)
y_pred_classes = np.argmax(y_pred_probs, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Display 36 random test images with predictions
fig, ax = plt.subplots(6, 6, figsize=(25, 25))
fig.suptitle("CNN Flower Classification Results", fontsize=24)

for i in range(6):
    for j in range(6):
        k = int(np.random.random_sample() * len(X_test))
        
        true_label = categories[y_true_classes[k]]
        pred_label = categories[y_pred_classes[k]]

        is_correct = (true_label == pred_label)
        color = 'green' if is_correct else 'red'

        ax[i, j].set_title(f"TRUE: {true_label}", color=color)
        ax[i, j].set_xlabel(f"PREDICTED: {pred_label}", color=color)
        ax[i, j].imshow(X_test[k]) 
        ax[i, j].axis('off')

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

# --- C. Plot Confusion Matrix ---

cm = confusion_matrix(y_true_classes, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=categories, yticklabels=categories)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
import os
import cv2
import numpy as np

#Encoding and Split data into Train/Test Sets
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

#Tensorflow Keras CNN Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, SGD, Adagrad,Adadelta,RMSprop

#Plot Images
import matplotlib.pyplot as plt

folder_dir = '/content/drive/MyDrive/Image_CLF_Datasets/flowers'

In [None]:
data = []
label = []

#crop the image to 128 x 128
SIZE = 128

for folder in os.listdir(folder_dir):
    for file in os.listdir(os.path.join(folder_dir, folder)):
        if file.endswith("jpg"):
            label.append(folder)
            img = cv2.imread(os.path.join(folder_dir, folder, file))
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            im = cv2.resize(img_rgb, (SIZE,SIZE))
            data.append(im)
        else:
            continue

In [None]:
# Convert data into numerical values
data_arr = np.array(data)
label_arr = np.array(label)

In [None]:
# print(data_arr)

In [None]:
# print(label_arr)

In [None]:
# Use label encoder and normalize the data
encoder = LabelEncoder()
y = encoder.fit_transform(label_arr)
print(y)
y = to_categorical(y,5)
print(y)
X = data_arr/255

In [None]:
#split the dataset into 80% training and 20% testing set
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30, random_state=10)

In [None]:
# Build Neura Network for flower classification:

model = Sequential()
model.add(Conv2D(filters=64,kernel_size=(3,3),padding='Same',activation='relu', input_shape = (SIZE, SIZE,3) ))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters = 128, kernel_size = (3,3), padding = "Same", activation = 'relu'))
model.add(Conv2D(filters = 128, kernel_size = (3,3), padding = "Same", activation = 'relu'))
model.add(Conv2D(filters = 128, kernel_size = (3,3), padding = "Same", activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(rate=0.25))
model.add(Dense(5, activation='softmax'))

In [None]:
print(model.summary())

In [None]:
# We need to create more training images to prevent overfitting before compiling

datagen = ImageDataGenerator(
    rotation_range = 20,
    zoom_range = 0.20,
    width_shift_range = 0.3,
    height_shift_range = 0.3,
    horizontal_flip = True,
    vertical_flip = True
)

datagen.fit(X_train)

In [None]:
#compile the model

model.compile(optimizer=Adam(learning_rate=0.0001),loss='categorical_crossentropy',metrics=['accuracy'])
batch_size = 32
epochs = 64
history = model.fit_generator(datagen.flow(X_train,y_train, batch_size=batch_size),
                              epochs = epochs,
                              validation_data = (X_test,y_test),
                              verbose = 2)

In [None]:
# let model identify the flower

categories = np.sort(os.listdir(folder_dir))
fig, ax = plt.subplots(6,6, figsize=(25,40))

for i in range(6):
    for j in range(6):
        k = int(np.random.random_sample() * len(X_test))
        if(categories[np.argmax(y_test[k])] == categories[np.argmax(model.predict(X_test)[k])]):
            ax[i,j].set_title("TRUE: " + categories[np.argmax(y_test[k])], color='green')
            ax[i,j].set_xlabel("PREDICTED: " + categories[np.argmax(model.predict(X_test)[k])],color='green')
            ax[i,j].imshow(np.array(X_test)[k].reshape(SIZE,SIZE,3), cmap='gray')
        else:
            ax[i,j].set_title("TRUE: " + categories[np.argmax(y_test[k])], color='red')
            ax[i,j].set_xlabel("PREDICTED: " + categories[np.argmax(model.predict(X_test)[k])],color='red')
            ax[i,j].imshow(np.array(X_test)[k].reshape(SIZE,SIZE,3), cmap='gray')