# **Date Fruit Classification**
In this project, we aim to classify different types of date fruits using machine learning techniques. The dataset consists of images of 9 different classes of dates.

In [None]:
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import img_to_array, load_img

## **Data Exploration**

In [None]:
train_dir = 'train'
test_dir = 'test'

print(f'There are {len(os.listdir(train_dir))} classes in the training set')
num_of_images = 0
for class_name in os.listdir(train_dir):
    print(f'There are {len(os.listdir(os.path.join(train_dir, class_name)))} images in {class_name} class')
    num_of_images += len(os.listdir(os.path.join(train_dir, class_name)))

print(f'There are {num_of_images} images in total in training set')
    

In [None]:
print(f'There are {len(os.listdir(test_dir))} classes in the test set')
num_of_images = 0
for class_name in os.listdir(test_dir):
    print(f'There are {len(os.listdir(os.path.join(test_dir, class_name)))} images in {class_name} class')
    num_of_images += len(os.listdir(os.path.join(test_dir, class_name)))
print(f'There are {num_of_images} images in total in test set') 

### **Samples of The Data**

In [None]:
import matplotlib.pyplot as plt
print("Sample of images in training set")
print("Sample of Ajwa Dates")
ajwa_images = os.listdir(os.path.join(train_dir, 'Ajwa'))
plt.imshow(load_img(os.path.join(train_dir, 'Ajwa', ajwa_images[0])))
plt.show()
print("Sample of Galaxy Dates")
galaxy_images = os.listdir(os.path.join(train_dir, 'Galaxy'))
plt.imshow(load_img(os.path.join(train_dir, 'Galaxy', galaxy_images[0])))
plt.show()
print("Sample of Sokari Dates")
sokari_images = os.listdir(os.path.join(train_dir, 'Sokari'))
plt.imshow(load_img(os.path.join(train_dir, 'Sokari', sokari_images[0])))
plt.show()

### **Image Resolution Check**

In the upcoming steps, we'll be **reducing the resolution** of our images. This strategy helps us **decrease the number of trainable parameters**, thereby making our model more efficient. Importantly, we aim to achieve this **without compromising the accuracy** of our classification.

In [None]:
sample_img = load_img(os.path.join(train_dir, 'Ajwa', ajwa_images[0]))
img_array = img_to_array(sample_img)
print(f'Shape of image array: {img_array.shape}')

## **Training and Validation Generators**
Will be using **ImageDataGenerators** to augment the data on the fly.

In [None]:
def train_val_generators(TRAINING_DIR):
  train_datagen = ImageDataGenerator(rescale=1./255,
                                     validation_split=0.1,# Reserve 10% of the data for validation
                                     horizontal_flip=True,)

  train_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
                                                      subset = 'training',
                                                      batch_size=32,
                                                      class_mode='categorical',
                                                      target_size=(200, 200))

  validation_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
                                                                subset = 'validation',
                                                                batch_size=32,
                                                                class_mode='categorical',
                                                                target_size=(200, 200))
  return train_generator, validation_generator

In [None]:
train_generator, validation_generator = train_val_generators(train_dir)

## **Training the Model**

With our dataset prepared and ready, we can now shift our focus towards constructing and training our machine learning model. This step involves defining the architecture of the model, compiling it, and then training it on our data. Will be using **transfer learning** to improve the model accuracy by taking weight from **MobileNet** which is a compact and effienct pre trained model.

In [None]:
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.optimizers import SGD
# Load the MobileNet model
base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(200, 200, 3))

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

model = tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(9, activation='softmax')
])

model.compile(optimizer=SGD(learning_rate=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('val_accuracy')>0.95):
      print("\nReached 95% validation accuracy so cancelling training!")
      self.model.stop_training = True

### Train the model with the optimal learning rate
Chose the following **learning rate** after many trial and errors.

In [None]:

callbacks = myCallback()
history = model.fit(train_generator, epochs=50, validation_data=validation_generator, callbacks=callbacks, verbose=2)

## **Visualizing the Results**

In this section, we'll take a closer look at the outcomes of our model training. By visualizing these results, we can gain deeper insights into the performance of our model and identify potential areas for improvement.

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))
# Training and validation accuracy
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()
# Training and validation loss
plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')


plt.show()

In [None]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=(200, 200),
                                                  batch_size=1,
                                                  class_mode='categorical',
                                                  shuffle=False)
y_pred = model.predict(test_generator, steps=len(test_generator), verbose=1)
# Convert prediction probabilities to class labels
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = test_generator.classes
# Get class labels
class_labels = list(test_generator.class_indices.keys())
print("Classification Report:")
print(classification_report(y_true, y_pred_classes, target_names=class_labels))

print("Confusion Matrix:")
matrix = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(matrix, annot=True, cbar=False, cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

## **Saving The Model**

In [None]:
history.model.save('model.keras')