In [None]:
# dataset : https://www.kaggle.com/datasets/imbikramsaha/caltech-101
# model link : https://www.kaggle.com/datasets/keras/vgg16/

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [2]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
dataset_dir = "./caltech-101-img" #Specifies the directory path where the dataset is located
dataset_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
)
#normalises the image

# # here batch_size is the number of images in each batch
batch_size = 2000
dataset_generator = dataset_datagen.flow_from_directory(
    dataset_dir,
    target_size=(64, 64), #resizes the image into 64 by 64 pixel
    batch_size=batch_size, #Sets the batch size for training.
    class_mode='categorical' # labels are one-hot encoded
)

Found 9144 images belonging to 102 classes.


In [4]:
x_train, y_train =  dataset_generator[0]
x_test, y_test = dataset_generator[1]

print(len(x_train))
print(len(x_test))

2000
2000


In [5]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [6]:
from tensorflow.keras.applications import VGG16

In [7]:
weights_path = "vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5"
base_model = VGG16(weights=weights_path, include_top=False, input_shape=(64, 64, 3))

In [8]:
for layer in base_model.layers:
   layer.trainable = False

In [9]:
x = Flatten()(base_model.output)
# Explanation: This line adds a Flatten layer to the output of the base_model. The Flatten layer is used to transform the 3D tensor output from the convolutional base (which is usually the output of the last convolutional layer) into a 1D tensor. This flattening step is necessary when transitioning from convolutional layers to densely connected layers.
# Example: Suppose the output shape of base_model is (7, 7, 512). This means you have a 3D tensor with dimensions 7x7x512. Applying the Flatten layer converts this 3D tensor into a 1D tensor by unraveling the values along the dimensions. In this case, the resulting 1D tensor would have a size of 7 * 7 * 512 = 25088.
x = Dense(64, activation='relu')(x)
predictions = Dense(102, activation='softmax')(x)

# Create the model
model = Model(inputs=base_model.input, outputs=predictions)
# Compile the model
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test))

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1s/step - accuracy: 0.1058 - loss: 4.3117 - val_accuracy: 0.2710 - val_loss: 3.5213
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.3123 - loss: 3.2023 - val_accuracy: 0.3680 - val_loss: 3.0913
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.4067 - loss: 2.8013 - val_accuracy: 0.4035 - val_loss: 2.8377
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.4864 - loss: 2.3627 - val_accuracy: 0.4585 - val_loss: 2.6010
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - accuracy: 0.5597 - loss: 2.0470 - val_accuracy: 0.4730 - val_loss: 2.4221
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.6056 - loss: 1.7890 - val_accuracy: 0.4905 - val_loss: 2.2840
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1c5cb0dffb0>

In [None]:
base_model = VGG16(weights=weights_path, include_top=False, input_shape=(64, 64, 3))
# freeze all layers first
for layer in base_model.layers:
   layer.trainable = False
# unfreeze last 4 layers of base model
for layer in base_model.layers[len(base_model.layers) - 2:]:
   layer.trainable = True
# fine-tuning hyper parameters
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)
predictions = Dense(102, activation='softmax')(x)

# Create the model
model = Model(inputs=base_model.input, outputs=predictions)
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
# training fine tuned model
model.fit(x_train, y_train, batch_size=64, epochs=20, validation_data=(x_test, y_test))


Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 1s/step - accuracy: 0.2632 - loss: 3.6391 - val_accuracy: 0.4895 - val_loss: 2.3887
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - accuracy: 0.5557 - loss: 1.9392 - val_accuracy: 0.5490 - val_loss: 1.9809
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1s/step - accuracy: 0.6955 - loss: 1.2457 - val_accuracy: 0.5510 - val_loss: 1.8548
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1s/step - accuracy: 0.8029 - loss: 0.7549 - val_accuracy: 0.5900 - val_loss: 1.7445
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1s/step - accuracy: 0.8694 - loss: 0.4785 - val_accuracy: 0.6070 - val_loss: 1.7749
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1s/step - accuracy: 0.9276 - loss: 0.2980 - val_accuracy: 0.6150 - val_loss: 1.7782
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━

In [None]:
import matplotlib.pyplot as plt
predicted_value = model.predict(x_test)

In [None]:
labels = list(dataset_generator.class_indices.keys())

In [None]:
n = 887

plt.imshow(x_test[n])
print("Preditcted: ",labels[np.argmax(predicted_value[n])])
print("Actual: ", labels[np.argmax(y_test[n])])

In [1]:
'''
Let's go through each code snippet in detail, breaking down its function, significance, and possible viva questions with answers.

---

### 1. Importing Libraries

```python
import tensorflow as tf
from tensorflow import keras
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
```

- **Explanation**: Imports necessary libraries. `tensorflow` for deep learning, `numpy` for numerical operations, and `ImageDataGenerator` for image preprocessing and augmentation.

- **Significance**: Importing these libraries enables image preprocessing and model building with TensorFlow.

- **Viva Questions**:
    1. **Q**: Why do we use `ImageDataGenerator`?  
       **A**: `ImageDataGenerator` allows for data augmentation and preprocessing, such as scaling images, which can improve model generalization.
    
    2. **Q**: What is the purpose of `numpy` in this code?  
       **A**: `numpy` provides support for handling numerical operations, such as creating arrays and manipulating numerical data structures.

---

### 2. Image Data Preparation

```python
dataset_dir = "./caltech-101-img"
dataset_datagen = ImageDataGenerator(rescale=1.0 / 255)
batch_size = 2000
dataset_generator = dataset_datagen.flow_from_directory(
    dataset_dir,
    target_size=(64, 64),
    batch_size=batch_size,
    class_mode='categorical'
)
```

- **Explanation**:
    - `dataset_dir`: Specifies the directory path where the dataset images are stored.
    - `ImageDataGenerator`: Rescales the images to have pixel values between 0 and 1 by dividing by 255.
    - `flow_from_directory`: Loads images, resizes them to 64x64 pixels, and sets batch size to 2000 with categorical labels (one-hot encoded).

- **Significance**: Prepares image data by normalizing and resizing for compatibility with the model.

- **Viva Questions**:
    1. **Q**: Why do we rescale the images by dividing by 255?  
       **A**: Rescaling normalizes pixel values to the range [0, 1], which speeds up training and improves performance.

    2. **Q**: What is the purpose of `target_size`?  
       **A**: `target_size` resizes all images to 64x64 pixels, ensuring a consistent input shape for the model.

---

### 3. Splitting the Dataset

```python
x_train, y_train = dataset_generator[0]
x_test, y_test = dataset_generator[1]
```

- **Explanation**: Retrieves the first and second batches from `dataset_generator` as `x_train`/`y_train` and `x_test`/`y_test`.

- **Significance**: Divides the data into training and testing sets for model evaluation.

- **Viva Questions**:
    1. **Q**: Why do we use `dataset_generator[0]` and `dataset_generator[1]`?  
       **A**: This loads the first two batches from the generator, serving as training and testing data.

---

### 4. Creating the Base Model

```python
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.applications import VGG16

weights_path = "vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5"
base_model = VGG16(weights=weights_path, include_top=False, input_shape=(64, 64, 3))
for layer in base_model.layers:
   layer.trainable = False
```

- **Explanation**:
    - Loads the pre-trained VGG16 model without the top classification layer.
    - Freezes the weights in all layers to prevent updating during training.

- **Significance**: Uses transfer learning from VGG16, which has learned general features that improve training efficiency.

- **Viva Questions**:
    1. **Q**: What is the advantage of setting `include_top=False` in VGG16?  
       **A**: It allows us to exclude VGG16’s dense layers so we can add custom layers suitable for our dataset.

    2. **Q**: Why do we freeze the layers in `base_model`?  
       **A**: Freezing preserves learned features, allowing the model to focus on learning task-specific features in new layers.

---

### 5. Adding Layers to the Model

```python
x = Flatten()(base_model.output)
x = Dense(64, activation='relu')(x)
predictions = Dense(102, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])
```

- **Explanation**:
    - **Flatten**: Converts 3D output of VGG16 into a 1D tensor.
    - **Dense(64)**: Adds a fully connected layer with 64 neurons.
    - **Dense(102)**: Adds output layer with 102 classes, using `softmax` for multiclass classification.

- **Significance**: Adds classification layers on top of the pre-trained base model, customizing it for the current dataset.

- **Viva Questions**:
    1. **Q**: Why is the `Flatten` layer added here?  
       **A**: `Flatten` changes the 3D output into a 1D array, making it compatible with dense layers.

    2. **Q**: Why do we use `softmax` in the final layer?  
       **A**: `softmax` is used for multiclass classification, outputting a probability distribution over 102 classes.

---

### 6. Training the Model

```python
model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test))
```

- **Explanation**: Trains the model on the dataset for 10 epochs with a batch size of 64, using `x_test`/`y_test` for validation.

- **Significance**: Helps optimize model weights for accurate classification.

- **Viva Questions**:
    1. **Q**: What is the purpose of using `validation_data` during training?  
       **A**: `validation_data` helps monitor the model's performance on unseen data to prevent overfitting.

---

### 7. Fine-tuning the Model

```python
for layer in base_model.layers[len(base_model.layers) - 2:]:
   layer.trainable = True
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)
predictions = Dense(102, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=64, epochs=20, validation_data=(x_test, y_test))
```

- **Explanation**:
    - **Fine-tunes**: Unfreezes the last two layers of VGG16 to allow adaptation to the new dataset.
    - **Dropout(0.3)**: Adds dropout regularization to reduce overfitting.

- **Significance**: Fine-tuning improves feature extraction for the specific dataset while preventing overfitting.

- **Viva Questions**:
    1. **Q**: What is the purpose of adding dropout?  
       **A**: Dropout reduces overfitting by randomly disabling neurons during training.

    2. **Q**: Why are only the last few layers of VGG16 unfrozen?  
       **A**: Unfreezing specific layers allows the model to learn dataset-specific patterns without altering core features.

---

### 8. Making Predictions and Displaying Results

```python
import matplotlib.pyplot as plt
predicted_value = model.predict(x_test)
labels = list(dataset_generator.class_indices.keys())
n = 887

plt.imshow(x_test[n])
print("Predicted: ", labels[np.argmax(predicted_value[n])])
print("Actual: ", labels[np.argmax(y_test[n])])
```

- **Explanation**:
    - Uses `model.predict` to get predicted classes for test images.
    - Displays the predicted and actual label for an example image.

- **Significance**: Allows visual comparison of model predictions with ground truth, which is essential for assessing model performance.

- **Viva Questions**:
    1. **Q**: How does `model.predict` work?  
       **A**: `model.predict` takes input data and outputs the predicted class probabilities.

    2. **Q**: Why do we use `np.argmax` in the print statement?  
       **A**: `np.argmax` finds the index of the maximum value, indicating the predicted class in a one-hot encoded array.

---

This comprehensive guide should prepare you well for viva questions by explaining the functionality, significance, and typical questions for each code snippet in your project. Let me know if you need more details on specific parts!
'''

'\nLet\'s go through each code snippet in detail, breaking down its function, significance, and possible viva questions with answers.\n\n---\n\n### 1. Importing Libraries\n\n```python\nimport tensorflow as tf\nfrom tensorflow import keras\nimport numpy as np\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\n```\n\n- **Explanation**: Imports necessary libraries. `tensorflow` for deep learning, `numpy` for numerical operations, and `ImageDataGenerator` for image preprocessing and augmentation.\n\n- **Significance**: Importing these libraries enables image preprocessing and model building with TensorFlow.\n\n- **Viva Questions**:\n    1. **Q**: Why do we use `ImageDataGenerator`?  \n       **A**: `ImageDataGenerator` allows for data augmentation and preprocessing, such as scaling images, which can improve model generalization.\n    \n    2. **Q**: What is the purpose of `numpy` in this code?  \n       **A**: `numpy` provides support for handling numerical operations, s