In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import tensorflow as tf 
import sklearn as sk 
import os 
import cv2
import albumentations as A
from PIL import Image 
from tensorflow import keras 
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

In [2]:
training_path = "/kaggle/input/sp-society-camera-model-identification/train/train/"
testing_path = "/kaggle/input/sp-society-camera-model-identification/test/test/"
_,training_classes,_=next(os.walk(training_path))
#print(training_classes)


In [37]:
batch_size = 32
epochs = 5
learning_rate = 1e-3

In [38]:
_,_,testing_files = next(os.walk(testing_path))
#testing_files

In [39]:
training_paths = [] 
labels = [] 
for i in training_classes:
    _,_,filenames = next(os.walk(training_path+i))
    for j in filenames:
        training_paths.append(training_path+i+'/'+j)
        labels.append(i)

In [40]:
assert len(training_paths)==len(labels)

In [41]:
seed = 33

In [42]:
training_data = pd.DataFrame(training_paths,columns=['Training Image Path'])
classes = pd.DataFrame(labels)
classes = pd.get_dummies(classes) # One Hot Encode the class variables

In [43]:
training_data

In [44]:
classes.head()

In [None]:
classes_dict = {'0':'HTC-1-M7',
                '1':'LG-Nexus-5x'
                '2':'Motorola-Droid-Maxx',
                '3':'Motorola-Nexus-6',
                '4':'Motorola-X',
                '5':'Samsung-Galaxy-Note3',
                '6':'Samsung-Galaxy-S4',
                '7':'Sony-NEX-7',
                '8':'iPhone-4s',
                '9':'iPhone-6'}

In [45]:
testing_data = pd.DataFrame(testing_files,columns=['Testing Image Path'])
testing_data

Shuffling the dataset before sending it to the Tensorflow Pipeline

In [46]:
def read_img(path):
    return np.array(Image.open(path))

In [71]:
length = 512
training_transforms = A.Compose([A.RandomCrop(height=length,width=length),
                                #A.RandomGamma(gamma_limit=(80,120),p=0.5),
                                A.JpegCompression(quality_lower=70,quality_upper=90,p=0.5),
                                A.GridDistortion(interpolation=cv2.INTER_CUBIC),
                                A.HorizontalFlip(p=0.3),
                                A.VerticalFlip(p=0.5)])
    

                                
testing_augmentation = A.Compose([
                                  A.CenterCrop(height=length,width=length)])

In [72]:
class Dataset(keras.utils.Sequence):
    def __init__(self,x,y,batch_size,augmentations,test):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.augmentations = augmentations 
        self.test = test 
    def __len__(self):
        return int(len(self.x)/self.batch_size)
    def __getitem__(self,index):
        batched_x = self.x.iloc[index*self.batch_size:(index+1)*self.batch_size].to_numpy()
        batched_y = self.y.iloc[index*self.batch_size : (index+1)*self.batch_size,:]
        images = [(self.augmentations(image=read_img(i[0]))['image']) for i in batched_x]
        
        return np.array(images)if self.test else (np.array(images),batched_y.values)

In [73]:
dummy_dataset = Dataset(training_data,classes,batch_size=batch_size,augmentations=training_transforms,test=False)

In [74]:
a,b=dummy_dataset.__getitem__(5)
#print(a[0])
print(a.shape)
#print(b)
print(b.shape)
plt.imshow(a[0])

del dummy_dataset

**Creating a baseline model using only an ANN **

In [77]:
def base_model(length,input_size,output_classes):
    ipt_layer = keras.Input(shape=(length,length,3))
    conv_nn1 = keras.layers.Conv2D(filters=10,kernel_size=2,strides=2,activation='relu')(ipt_layer)
    max_pool = keras.layers.MaxPool2D()(conv_nn1)
    conv_nn2 = keras.layers.Conv2D(filters=4,kernel_size=2,strides=2,activation='relu')(max_pool)
    max_pool2 = keras.layers.MaxPool2D()(conv_nn2)
    flatten = keras.layers.Flatten()(max_pool2)
    op_layer = keras.layers.Dense(output_classes,activation='softmax')(flatten)
    optimiser = keras.optimizers.SGD(learning_rate =learning_rate)
    losses = keras.losses.CategoricalCrossentropy()
    metrics = keras.metrics.Accuracy()
    model = keras.Model(inputs = ipt_layer,outputs = op_layer)
    model.compile(optimizer=optimiser,loss=losses,metrics=metrics)
    return model

In [78]:
training_model = base_model(length,length*length*3,10)

In [79]:
training_model.summary()

In [80]:
xtrain,xval,ytrain,yval = train_test_split(training_data,classes,test_size=0.1,shuffle=True,random_state=seed)

In [81]:
print(len(xtrain),len(xval))
print(len(ytrain),len(yval))

In [82]:
training_dataset = Dataset(xtrain,ytrain,batch_size=batch_size,augmentations=training_transforms,test=False)
validation_dataset = Dataset(xval,yval,batch_size=batch_size,augmentations=training_transforms,test=False)

print(len(training_dataset))
print(len(validation_dataset))

In [83]:
model_file = "output/base_model_weights.hd5"

checkpoint = tf.keras.callbacks.ModelCheckpoint(model_file, monitor="val_accuracy", save_best_only=True, mode='max')

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_accuracy", factor=0.9, patience=2, min_lr=1e-6, mode="max", verbose=True)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=5, mode="max", verbose=True)

callbacks_list = [checkpoint, reduce_lr, early_stopping]


In [None]:
training_model.fit(training_dataset,validation_data=validation_dataset,epochs=epochs,batch_size=batch_size,callbacks=callbacks_list,verbose=1)

In [None]:
sample_submission = pd.read_csv('sample_submission.csv')
sample_submission.head()

In [None]:
x_test = testing_path+sample_submission['fname']

training_model.load_model(model_file)
predictions = training_model.predict(testing_dataset)