In [None]:
%tensorflow_version 2.x
import tensorflow as tf
print("Tensorflow version " + tf.__version__)

Tensorflow version 2.4.1


In [None]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 9516498326208313734, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 14674281152
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 13714744299332723860
 physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"]

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os 
from collections import Counter
import itertools
import shutil
import random
import glob
import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)
%matplotlib inline

In [None]:
import cv2

In [None]:
def check_image(image):  
    '''
      Input an image,
      It checks whether it is a RGB type or not. If not, it converts the image to RGB and returns that.
      
    '''

    img = cv2.imread(image)

    if len(img.shape)==2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    elif len(img.shape)==3 and img.shape[2]==1:
        img = cv2.cvtColor(img[:, :, 0], cv2.COLOR_GRAY2RGB)
    elif len(img.shape)==3 and img.shape[2]==2:
        img = cv2.cvtColor(img[:, :, 0], cv2.COLOR_GRAY2RGB)
    
    return img

In [None]:
'''def denoise_image(img_arr):
    

       # Input a read image,
       # It denoises the image and returns the filtered one.
       
    
    ret = cv2.fastNlMeansDenoisingColored(img_arr, None, 10, 10, 7, 21)
    
    return ret '''

# denoising images using fastNlMeans method might lead to loss of important features of the image. So better, avoid that.




In [None]:
''' Count of images for eight different age groups are:
                                                            00: 920
                                                            01: 1063
                                                            02: 2621
                                                            03: 2664
                                                            04: 1556
                                                            05: 919
                                                            06: 550
                                                            07: 375 
So, randomely select 915 images from each of the classes 00, 01, 02, 03, 04, 05. And do the data augmentation for classes 06  
and 07 to make each of them have 915 images too. Total we will have 915*8 images in the training set.'''




In [None]:
def data_sampling(num):
    
    # randomely collects 915 images from each of the classes/folders: 00, 01, 02, 03, 04, and 05. And put those collected 
    # images to their corresponding newly created folders/classes: 0, 1, 2, 3, 4, and 5 respectively.
    
    path = r'C:\Users\harsh raj\OneDrive\Desktop\third_proj\train'
    str1 = '0' + str(num) 
    str2 = str(num)
    path = path + '/' + str1
    
    os.chdir(path)
    if os.path.isdir(str2) is False:
        os.makedirs(str2)
    
        for c in random.sample(glob.glob('000*'), 915):
            shutil.move(c, str2)

In [None]:
for i in range(6):
    data_sampling(i)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img

def data_augmentation(numb): 
    
    datagen = ImageDataGenerator(
                                rotation_range = 40,
                                width_shift_range = 0.2,
                                height_shift_range = 0.2,
                                shear_range = 0.2,
                                zoom_range = 0.2,
                                horizontal_flip = True,
                                fill_mode = 'nearest')

    # Did data augmentation of 'valu' number of images randomely selected inside the folder/class '06' & '07' each, such that
    # each image produces 5 augmented images. => Total of valu*5 images produced for each of the '06' and '07 according to it's
    # 'valu'. The produced images are saved in the same folder '06' or '07' correspondingly. 
    # So, in total, there are 550+valu*5 = 915 images in the folder '06' and 375+valu*5 = 915 images in the folder '07'. 
    # And thus, we have 915 images for each class now.
    
    stree = '0' + str(numb)
    pathh = r'C:\Users\harsh raj\OneDrive\Desktop\third_proj\train' + '/' + stree
    
    if numb == 6:
        valu = 73
    else:
        valu = 108
    
    os.chdir(pathh)     # in the pathh directory.
    for ci in random.sample(glob.glob('000*'), valu):

        im = load_img(ci)
        imgg = img_to_array(im)
        imgg = imgg.reshape((1,) + imgg.shape)

        j = 0
        for batch in datagen.flow(imgg, batch_size = 1,
                                        save_to_dir = pathh, save_format = 'jpg'):
            j += 1
            if j>4:
                break    

In [None]:
data_augmentation(6)   # data augmentation in the folder '06'
data_augmentation(7)   # data augmentation in the folder '07'

In [None]:
# Before further preprocessing, we copied the folders '0', '1', '2', '3', '4', '5', '06', and '07' from the 'train' folder and 
# pasted them as '0', '1', '2', '3', '4', '5', '6', and '7' in a new folder named 'train_final'.

In [None]:
from google.colab import drive
drive.mount('/content/Pdrive')

Drive already mounted at /content/Pdrive; to attempt to forcibly remount, call drive.mount("/content/Pdrive", force_remount=True).


In [None]:
train_path = '/content/Pdrive/MyDrive/Colab Notebooks/train_final'

In [None]:
# collect and preprocess all the 7305 images of training set.
train_batches = ImageDataGenerator(preprocessing_function = tf.keras.applications.vgg16.preprocess_input) \
     .flow_from_directory(directory = train_path, target_size = (224, 224),   # collect all the input images and preprocess them all at once.
                         classes = ['0', '1', '2', '3', '4', '5', '6', '7'], batch_size = 7305)   

Found 7305 images belonging to 8 classes.


In [None]:
# verify whether we got the required number of images from the directory or not.
assert train_batches.n == 7305
assert train_batches.num_classes == 8

In [None]:
imgs, labels = next(train_batches)  # collected the complete training set as X_train = imgs & y_train = labels.

In [None]:
print(imgs.shape)
print(labels.shape)

(7305, 224, 224, 3)
(7305, 8)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Activation, Conv2D, MaxPool2D, Dropout, BatchNormalization, AveragePooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.regularizers import l1, l2, l1_l2

In [None]:
def create_model():
  
  vgg16_model = tf.keras.applications.vgg16.VGG16()

  model = Sequential()
  for layer in vgg16_model.layers[:-1]:
      model.add(layer)

  for layer in model.layers:
      layer.trainable = False

  model.add(Dense(units = 8, activation = 'softmax'))

  return model

In [None]:
model = create_model()
model.compile(
      optimizer = Adam(learning_rate = 0.001),
      loss='categorical_crossentropy',
      metrics=['accuracy'])

model.fit(
    x = imgs, y = labels, batch_size = 128, epochs = 100, verbose = 2
)

Epoch 1/100
58/58 - 30s - loss: 1.8235 - accuracy: 0.3298
Epoch 2/100
58/58 - 30s - loss: 1.4757 - accuracy: 0.4208
Epoch 3/100
58/58 - 30s - loss: 1.3528 - accuracy: 0.4809
Epoch 4/100
58/58 - 30s - loss: 1.2604 - accuracy: 0.5123
Epoch 5/100
58/58 - 30s - loss: 1.1975 - accuracy: 0.5432
Epoch 6/100
58/58 - 30s - loss: 1.1071 - accuracy: 0.5789
Epoch 7/100
58/58 - 30s - loss: 1.0642 - accuracy: 0.5960
Epoch 8/100
58/58 - 30s - loss: 1.0128 - accuracy: 0.6301
Epoch 9/100
58/58 - 30s - loss: 0.9766 - accuracy: 0.6457
Epoch 10/100
58/58 - 30s - loss: 0.9259 - accuracy: 0.6741
Epoch 11/100
58/58 - 30s - loss: 0.8874 - accuracy: 0.6928
Epoch 12/100
58/58 - 30s - loss: 0.8593 - accuracy: 0.7034
Epoch 13/100
58/58 - 30s - loss: 0.8553 - accuracy: 0.6986
Epoch 14/100
58/58 - 30s - loss: 0.8064 - accuracy: 0.7246
Epoch 15/100
58/58 - 30s - loss: 0.7991 - accuracy: 0.7261
Epoch 16/100
58/58 - 30s - loss: 0.7583 - accuracy: 0.7480
Epoch 17/100
58/58 - 30s - loss: 0.7334 - accuracy: 0.7613
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7efb986f5b50>

In [None]:
test_path = '/content/Pdrive/MyDrive/Colab Notebooks/test'   # the path to access all the test images

In [None]:
import glob
from tensorflow.keras.applications.vgg16 import preprocess_input

df = pd.DataFrame({'Image Name': [], 'Labels': []}, columns = ['Image Name', 'Labels'])

# preprocess all the test images now. (similarly how train images were done)

for filename in glob.iglob(test_path + '**/*.jpg', recursive=True):
  imag = cv2.imread(filename)
  imag = cv2.resize(imag, (224, 224))
  imag = imag.reshape((1,) + imag.shape)
  imag = preprocess_input(imag)
  pred_val = model.predict(imag)
  got_val = np.argmax(pred_val)
  shit = filename
  shiit = shit.split('/')[6]
  df.loc[len(df.index)] = [shiit, got_val]


In [None]:
df['Labels'] = df['Labels'].astype(int)

In [None]:
df.to_csv('/content/Pdrive/MyDrive/Colab Notebooks/Final-Submission.csv', index = False)