In [None]:
## CNN algorithm
# description: In CNN we follow muliple steps as 
# convolutional layers, where kernel interact with the image and an activation finction will act on it to make it non-linear
# max or avg pooling where dimesions are reduced
# falttening of the tensor into vector
# fully connnected layers, which connects all neurons from flattening layer to the output
# aprt form this there are other terms also thta will be included like paddig, stride etc

# important: there are pre trained models for CNN like MobileNet and VGG16. these can be used in future integrating with other methods     


"""
Steps in the Code:

1.Create and normalize the grayscale image.
2. Define the kernel and add padding to the image.
3.Perform convolution with ReLU activation.
4. Apply max pooling to reduce spatial dimensions.
5. Flatten the pooled feature map into a 1D vector.
6. Pass the flattened vector through the fully connected layer.
7. Apply softmax activation for classification.
8. Apply dropout for regularization during training.

"""

# importing the required libraries
import os
import numpy as np
from PIL import Image  # For loading and preprocessing the image
import matplotlib.pyplot as plt  # For displaying the image
import tensorflow as tf  # For building the CNN model
import tensorflow_datasets as tfds  # For loading the dataset


# we need a dataset to tarin our CNN model
# we use tensorflow dataset
# in case you have your own data set, save the dataset in diffrent folders 
# the different folder belong to different classes need to be identified classes for example, class1 tulip, class2, roses and class3sunflower etc

""" 
data pipeline should be like this
dataset/
    tulip/          # Folder containing all tulip images
        tulip1.jpg
        tulip2.jpg
        ...
    rose/           # Folder containing all rose images
        rose1.jpg
        rose2.jpg
        ...
    sunflower/      # Folder containing all sunflower images
        sunflower1.jpg
        sunflower2.jpg
        ...

in this dataset downloaded form tf the data is 
daisy: label = 0
dandelion: label = 1
roses: label = 2
sunflowers: label = 3
tulips: label = 4
        """



# Specify a custom path for the dataset
custom_path = r"flower_data"  # Replace with your desired folder path

# Load the tf_flowers dataset and specify the custom path
dataset, info = tfds.load('tf_flowers', as_supervised=True, with_info=True, data_dir=custom_path)


class_names = info.features['label'].names
print("Class Names:", class_names)

# Take a few examples from the dataset
for image, label in dataset['train'].take(10):  # Display 10 images
    plt.imshow(image.numpy())  # Convert Tensor to NumPy array for visualization
    plt.title(f"Label: {label.numpy()}")  # Display the label
    plt.axis('off')
    plt.show()

# its important to keep the class names and defne later for output to understand the model



# # Split the dataset into training and validation sets
# The dataset is currently not shuffled before splitting. It is importent to shuffle. 
# If the data is ordered by class, this can lead to biased splits (e.g., all tulips in the training set, all roses in the validation set

shuffled_dataset = dataset['train'].shuffle(1000)
train_dataset = shuffled_dataset.take(3500) # First 3500 examples for training
val_dataset = shuffled_dataset.skip(3500)      ## Remaining examples for validation


# Preprocess the data: Resize and normalize images

def preprocess(image, label):
    image = tf.image.resize(image, (32, 32))  # Resize to 32x32
    image = image / 255.0                     # Normalize to [0, 1]
    return image, label



# Apply preprocessing function to the training and validation datasets
# this is donr in batch to make it faster
# Note: The batch size is set to 32, and the dataset is shuffled with a buffer size of 1000.

train_dataset = train_dataset.map(preprocess).batch(32).shuffle(1000)
val_dataset = val_dataset.map(preprocess).batch(32)



# Print an example batch
for images, labels in train_dataset.take(1):
    print("Image batch shape:", images.shape)
    print("Label batch shape:", labels.shape)



# define a CNN model
# this includes: input, con layers, pooling, flattening, fully connected ayers, and output layer
def create_cnn_model(input_shape, num_classes):
    model = tf.keras.Sequential([
        # Convolutional Layer 1
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape), # 1st hidden layer (remember only conv is hidden layer)
        tf.keras.layers.MaxPooling2D((2, 2)),  # Pooling Layer 1
        
        # Convolutional Layer 2
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),      # second hiddenlayer
        tf.keras.layers.MaxPooling2D((2, 2)),  # Pooling Layer 2
        
        # Flatten the feature maps
        tf.keras.layers.Flatten(),
        
        # Fully Connected Layer
        tf.keras.layers.Dense(128, activation='relu'),      ## 3rd hidden layer (fully connected)
        tf.keras.layers.Dropout(0.5),  # 50 % Dropout for regularization. 50% of the neurons in the layer will be randomly deactivated (set to zero)
        
        # Output Layer
        tf.keras.layers.Dense(num_classes, activation='softmax')  # Use softmax for multi-class classification
    ])
    return model


# Pooling, dropout, and flattening layers are not considered hidden layers
# upgrade: increase the no of hiddenlayer (only conv)
# Define the input shape and number of classes
input_shape = (32, 32, 3)  # Images are resized to 32x32 with 3 channels (RGB)
num_classes = 5  # Number of flower classes (e.g., tulip, rose, etc.)


# Create the CNN model
model = create_cnn_model(input_shape, num_classes)


# Print the model summary
model.summary()


# one can use learning rate scheduler to adjust the learning rate during training
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=1000,
    decay_rate=0.9
)


# define the optimizer with desired learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)


# Compile the model 
model.compile(optimizer= optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])


# other type of otimsers that can be used: SGD (Stochastic Gradient Descent), RMSprop, Adagrad
# other metrics: Precision, Recall, and F1-Score (useful for imbalanced datasets) and Mean Squared Error (for regression tasks).
# Train the model
history = model.fit(train_dataset, epochs=30, validation_data=val_dataset)
# increase the no of epochs for training


# saving the model
model.save('cnn_model.h5')


# Plotting the training and validation accuracy and loss over epochs
# Access the training and validation accuracy
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

# Access the training and validation loss
train_loss = history.history['loss']
val_loss = history.history['val_loss']

# Plot the accuracy
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend()
plt.title('Accuracy over Epochs')
plt.show()

# Plot the loss
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend()
plt.title('Loss over Epochs')
plt.show()


# test the model on new images

test_folder = r"flower_data\test"  # Path to your test folder

# Class names for the dataset (e.g., tf_flowers)
# make sure the class names are same as the one used in training

class_names = ['dandelion', 'daisy', 'tulips', 'sunflowers', 'roses']

# Loop through each image in the folder
for file_name in os.listdir(test_folder):
    file_path = os.path.join(test_folder, file_name)
    
    # Check if the file is an image
    if file_name.endswith(('.jpg', '.png', '.jpeg', '.jif')):
        try:
            # Step 1: Load and preprocess the image using PIL
            image = Image.open(file_path)  # Open the image
            image = image.resize((32, 32))  # Resize to 32x32
            image_array = np.array(image) / 255.0  # Convert to numpy array and normalize to [0, 1]
            image_batch = np.expand_dims(image_array, axis=0)  # Add batch dimension
            
            # Step 2: Predict the class probabilities
            predictions = model.predict(image_batch)
            
            # Step 3: Get the predicted class
            predicted_class = np.argmax(predictions[0])  # Get the class index with the highest probability
            predicted_class_name = class_names[predicted_class]
            
            # Step 4: Display the image with its prediction
            plt.imshow(image)  # Display the image
            plt.axis('off')  # Turn off axis
            plt.title(f"Predicted: {predicted_class_name}")  # Add the prediction as the title
            plt.show()  # Show the plot
            
        except Exception as e:
            print(f"Error processing file {file_name}: {e}")
