### Pre-requisite - Download Datasets
Run all the cells below after downloading the required kaggle.json file (follow [these steps](https://www.kaggle.com/discussions/general/156610))

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
from google.colab import files
files.upload()

In [None]:
!ls -lha kaggle.json

In [None]:
!pip install -q kaggle

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [None]:
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
!pwd

In [None]:
!kaggle datasets download -d anupriyakkumari/instagram-5-classes-dataset-1

In [None]:
!kaggle datasets download -d anupriyakkumari/instagram-5-classes-dataset-2

In [None]:
!unzip instagram-5-classes-dataset-1.zip

In [None]:
!unzip instagram-5-classes-dataset-2.zip

### Note - rename the folders for consistency.
We renamed them to - Instagram_Dataset_1 and Instagram_Dataset_2

#4.1 (Sequential Models)


### 4.1.1 Train model

In this approach, we will again first import required libraries, then upload our dataset in a similar way as above but this time we have a single folder containing subfolders of the 5 classes and another folder for testing on unseen data also containing subfolders of 5 classes.
Run each cell one by one.

In [None]:
#loading essential libraries
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
from tensorflow.keras import layers

Here we load the dataset and again set the paramters as per requirement. We are splitting the training dataset into train and validation, with validation containing 10% of the images.

In [None]:
#setting important parameters and loading the three required datasets
batch_size = 64
img_height = 180
img_width = 180
data_dir= "/content/Instagram_Dataset_1/classes"
#using bigger dataset for now
data_dir_new="/content/Instagram_Dataset_2/classes"
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir_new,
  validation_split=0.1,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir_new,
  validation_split=0.1,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)
class_names = train_ds.class_names
print(class_names)


In [None]:
#creating test_ds and loading images
batch_size = 32
img_height = 180
img_width = 180
test_dir= "/content/Instagram_Dataset_2/test"
test_ds = tf.keras.utils.image_dataset_from_directory(
  test_dir,
  image_size=(img_height, img_width),
  batch_size=batch_size)


Printing the dimension of the object generated by tf.keras.utils.image_dataset_from_directory function which is a tf.data.dataset object - float32 tensor and int32 tensor


In [None]:
test_ds = test_ds.map(lambda x, y: (tf.keras.applications.resnet50.preprocess_input(x), y))


In [None]:
for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

Plotting 9 images from batch specified in .take() - random each time because of seed


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

In [None]:
#autotuning data and applying prefetch
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

In [None]:
# creating data augmentation layer
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal'),
  tf.keras.layers.RandomRotation(0.1),
  tf.keras.layers.RandomContrast(factor=0.3),

])

In [None]:
# plotting one augmented image
for image, _ in train_ds.take(1):
  plt.figure(figsize=(10, 10))
  first_image = image[0]
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    augmented_image = data_augmentation(tf.expand_dims(first_image, 0))
    plt.imshow(augmented_image[0] / 255)
    plt.axis('off')

In [None]:
# creating normalisation layer to rescale image
normalization_layer = tf.keras.layers.Rescaling(1./255)

In [None]:
#initial sequential network built

num_classes = 5

model41 = tf.keras.Sequential([
  normalization_layer,
  data_augmentation,
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(num_classes)
])

In [None]:
# compiling sequential model
model41.compile(
  optimizer='adam',
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

In [None]:
#finding the right value of epochs is tough as we might risk overfitting the model i.e., when accuracy > val_accuracy
model41.fit(
  train_ds,
  validation_data=val_ds,
  epochs=10
)

### 4.1.2 Save model

In [None]:
#saving model so that we don't have to do the above steps again
model41.save('/content/gdrive/MyDrive/Image_Classification/model_seq_1.h5')

### 4.1.3 Test model

In [None]:
# evaluating on test_ds and val_ds and train_ds also
test_loss1, test_acc1 = model41.evaluate(test_ds)
val_loss1, val_acc1=model41.evaluate(val_ds)
train_loss1,train_acc1=model41.evaluate(train_ds)

In [None]:
print('Test loss :', test_loss1, 'Test accuracy:', test_acc1)
print('Val loss :', val_loss1, 'Val accuracy:', val_acc1)
print('Train loss :', train_loss1, 'Train accuracy:', train_acc1)

#4.2

### 4.2.1 Train model

In [None]:
#second sequential model
#accuracy stuck after a certain point (haven't checked why yet)
model42 = tf.keras.Sequential([
    normalization_layer,
  data_augmentation,
tf.keras.layers.Conv2D(16,(3,3),activation = tf.nn.relu,input_shape=(180,180, 3)),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(32,(3,3),activation = tf.nn.relu),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(64,(3,3),activation = tf.nn.relu),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(128,(3,3),activation = tf.nn.relu),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(512,kernel_regularizer = tf.keras.regularizers.l2(0.001), activation = tf.nn.relu),
tf.keras.layers.Dense(5,activation = tf.nn.relu)
])


In [None]:
model42.compile(
  optimizer='adam',
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

In [None]:
#finding the right value of epochs is tough, we run epochs until accuracy> val_accuracy then we stop
model42.fit(
  train_ds,
  validation_data=val_ds,
  epochs=8
)

### 4.2.2 Save model

In [None]:
model42.save('/content/gdrive/MyDrive/Image_Classification/model_seq_2.h5')

### 4.2.3 Test model

In [None]:
test_loss2, test_acc2 = model42.evaluate(test_ds)
val_loss2, val_acc2=model42.evaluate(val_ds)
train_loss2,train_acc2=model42.evaluate(train_ds)

In [None]:
print('Test loss :', test_loss2, 'Test accuracy:', test_acc2)
print('Test loss :', val_loss2, 'Test accuracy:', val_acc2)
print('Test loss :', train_loss2, 'Test accuracy:', train_acc2)

#4.3

### 4.3.1 Train model

In [None]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from keras.regularizers import l1_l2

model = Sequential()
model.add(normalization_layer)
model.add(data_augmentation)
#### Input Layer ####
model.add(Conv2D(filters=32, kernel_size=(3,3), padding='same',
                 activation='relu', input_shape=(128, 128, 3)))

#### Convolutional Layers ####
model.add(Conv2D(32, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2)))  # Pooling
model.add(Dropout(0.2)) # Dropout

model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
model.add(Conv2D(128, (3,3), activation='relu'))
model.add(Activation('relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(512, (5,5), padding='same', activation='relu'))
model.add(Conv2D(512, (5,5), activation='relu'))
model.add(MaxPooling2D((4,4)))
model.add(Dropout(0.2))

#### Fully-Connected Layer ####
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(5, activation='softmax'))
model.build((None,180,180,3))
model.summary() # a handy way to inspect the architecture

In [None]:
!pip install livelossplot

In [None]:
%%time

from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from livelossplot import PlotLossesKeras

steps_per_epoch = 82
val_steps = 10

n_epochs = 5

optimizer = RMSprop(learning_rate=0.0001)

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Saves Keras model after each epoch
checkpointer = ModelCheckpoint(filepath='img_model.weights.best.hdf5',
                               verbose=1,
                               save_best_only=True)

# Early stopping to prevent overtraining and to ensure decreasing validation loss
early_stop = EarlyStopping(monitor='val_loss',
                           patience=10,
                           restore_best_weights=True,
                           mode='min')

# tensorboard_callback = TensorBoard(log_dir="./logs")

# Actual fitting of the model
history = model.fit(train_ds,
                    epochs=n_epochs,
                    steps_per_epoch=steps_per_epoch,
                    validation_data=val_ds,
                    validation_steps=val_steps,
                    callbacks=[early_stop, checkpointer, PlotLossesKeras()],
                    verbose=False)

### 4.3.2 Save model

In [None]:
model.save("/content/gdrive/MyDrive/Image_Classification/model_seq_3.h5")

### 4.3.3 Test model

In [None]:
#third model
model.load_weights("/content/gdrive/MyDrive/Image_Classification/model_seq_3.h5")

predicted_classes = model.predict_classes(test_ds)

class_indices = train_ds.class_indices
class_indices = dict((v,k) for k,v in class_indices.items())
true_classes = test_ds.classes

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def display_results(y_true, y_preds, class_labels):

    results = pd.DataFrame(precision_recall_fscore_support(y_true, y_preds),
                          columns=class_labels).T

    results.rename(columns={0: 'Precision', 1: 'Recall',
                            2: 'F-Score', 3: 'Support'}, inplace=True)

    results.sort_values(by='F-Score', ascending=False, inplace=True)
    global_acc = accuracy_score(y_true, y_preds)

    print("Overall Categorical Accuracy: {:.2f}%".format(global_acc*100))
    return results

def plot_predictions(y_true, y_preds, test_generator, class_indices):

    fig = plt.figure(figsize=(20, 10))
    for i, idx in enumerate(np.random.choice(test_generator.samples, size=20, replace=False)):
        ax = fig.add_subplot(4, 5, i + 1, xticks=[], yticks=[])
        ax.imshow(np.squeeze(test_generator[idx]))
        pred_idx = y_preds[idx]
        true_idx = y_true[idx]

        plt.tight_layout()
        ax.set_title("{}\n({})".format(class_indices[pred_idx], class_indices[true_idx]),
                     color=("green" if pred_idx == true_idx else "red"))

In [None]:
model.load_weights('img_model.weights.best.hdf5')

predicted_classes = model.predict_classes(testgen)

class_indices = traingen.class_indices
class_indices = dict((v,k) for k,v in class_indices.items())
true_classes = testgen.classes

In [None]:
plot_predictions(true_classes, predicted_classes, testgen, class_indices)


In [None]:
display_results(true_classes, predicted_classes, class_indices.values())


Plotting images and labels for unseen data on any model

In [None]:
# using the first model to display images on unseen data with predicted labels - very inaccuarate, as expected.
from keras.applications.imagenet_utils import preprocess_input
from tensorflow.keras.utils import load_img
from tensorflow.keras.preprocessing import image
from keras.models import load_model
import numpy as np
import os
import matplotlib.pyplot as plt

model = load_model('/content/gdrive/MyDrive/Image_Classification/model_seq_1.h5')

# Set the path to the folder containing the images
folder_path = '/content/Instagram_Dataset_2/unseen'

# Loop through the images in the folder
for filename in os.listdir(folder_path):

    # Load the image
    img = image.load_img(os.path.join(folder_path, filename), target_size=(180, 180))

    # Convert the image to a numpy array
    img_array = image.img_to_array(img)

    # Reshape the array to match the input shape of the VGG16 model
    img_array = np.expand_dims(img_array, axis=0)

    # Preprocess the input image (normalize pixel values to be between -1 and 1)
    img_array = preprocess_input(img_array)

    # Make a prediction on the image
    preds = model.predict(img_array)
    print(preds)

    # Decode the prediction into a human-readable label
    label = int(preds.argmax(axis=-1))
    label_name={0:'beauty',1:'food',2:'memes',3:'pets',4:'travel'}
    #label_name={0:'animals',1:'beauty',2:'food',3:'memes',4:'travel'}


    # Display the image with predicted label
    plt.imshow(img)
    plt.title(label_name[label])
    plt.show()

