In [None]:
#importing the python libraries 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import tensorflow as tf 
import sklearn as sk 
import os 
import cv2
import albumentations as A
from PIL import Image 
from tensorflow import keras 
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

In [None]:
#setting the training and testing variables to store the two directories
training_path = "/kaggle/input/sp-society-camera-model-identification/train/train/"
testing_path = "/kaggle/input/sp-society-camera-model-identification/test/test/"
_,training_classes,_=next(os.walk(training_path))
#print(training_classes)


In [None]:
#Setting the hyperparameters forthe neural network
batch_size = 8
learning_rate = 1e-3

In [None]:
#Extracting the testing files directories into a variable
_,_,testing_files = next(os.walk(testing_path))


In [None]:
# Writing the training directories into a variable by iterating over the directories 
# Extracting the classes from directory labels 
training_paths = [] 
labels = [] 
for i in training_classes:
    _,_,filenames = next(os.walk(training_path+i))
    for j in filenames:
        training_paths.append(training_path+i+'/'+j)
        labels.append(i)

In [None]:
#Ensure that the training directories and the class labels are of the same length
assert len(training_paths)==len(labels)

In [None]:
#Fixing the seed to a fixed value for reproducible results
seed = 33

In [None]:
# Convert the training data to a Dataframe for easy indexing 
#Take the labels and one-hot encode them
training_data = pd.DataFrame(training_paths,columns=['Training Image Path'])
classes = pd.DataFrame(labels)
classes = pd.get_dummies(classes) # One Hot Encode the class variables

In [None]:
#print the training data
training_data

In [None]:
#print the first few rows of the one-hot encoded variable
classes.head()

In [None]:
#Defining a class that stores the label and the number attached to it
classes_dict = {'0':'HTC-1-M7',
                '1':'LG-Nexus-5x',
                '2':'Motorola-Droid-Maxx',
                '3':'Motorola-Nexus-6',
                '4':'Motorola-X',
                '5':'Samsung-Galaxy-Note3',
                '6':'Samsung-Galaxy-S4',
                '7':'Sony-NEX-7',
                '8':'iPhone-4s',
                '9':'iPhone-6'}

In [None]:
# Create a testing DataFrame for easy indexing
testing_data = pd.DataFrame(testing_files,columns=['Testing Image Path'])
testing_data

Shuffling the dataset before sending it to the Tensorflow Pipeline

In [None]:
#Read Images from the disk to memory

def read_img(path):
    return np.array(Image.open(path))

In [None]:
#Defining the training and testing augmentations to apply on the images

length = 512 #Defining the lenght needed for each crop
training_transforms = A.Compose([A.RandomCrop(height=length,width=length),
                                A.RandomGamma(gamma_limit=(80,120),p=0.9),
                                A.JpegCompression(quality_lower=70,quality_upper=90,p=0.9),
                                A.GridDistortion(interpolation=cv2.INTER_CUBIC)])
    

                                
testing_augmentation = A.Compose([
                                  A.CenterCrop(height=length,width=length)])

In [None]:
#Creating a custom keras dataset to feed the neural network 
class Dataset(keras.utils.Sequence):
    def __init__(self,x,y,batch_size,augmentations,test):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.augmentations = augmentations 
        self.test = test 
    def __len__(self):
        return int(len(self.x)/self.batch_size)
    def __getitem__(self,index):
        batched_x = self.x.iloc[index*self.batch_size:(index+1)*self.batch_size].to_numpy()
        if( not self.test):
            batched_y = self.y.iloc[index*self.batch_size : (index+1)*self.batch_size,:]
            images = [(self.augmentations(image=read_img(i[0]))['image']) for i in batched_x]
        else:
            images = [(self.augmentations(image=read_img(i))['image']) for i in batched_x]
        return np.array(images)if self.test else (np.array(images),batched_y.values)

In [None]:
#Instantiating a dummy dataset to check the custom dataset's functionality
dummy_dataset = Dataset(training_data,classes,batch_size=batch_size,augmentations=training_transforms,test=False)

In [None]:
#Calling the variables from the dummy dataset to check for functionality
a,b=dummy_dataset.__getitem__(5)
#print(a[0])
print(a.shape)
#print(b)
print(b.shape)
plt.imshow(a[0])

del dummy_dataset

**Creating a baseline model using only an ANN **

In [None]:
#Creating the keras model for training 

def base_model(length,input_size,output_classes):
    ipt_layer = keras.Input(shape=(length,length,3))
    flatten = keras.layers.Flatten()(ipt_layer)
    dense_1 = keras.layers.Dense(input_size//(512*128),activation='relu')(flatten)
    op_layer = keras.layers.Dense(output_classes,activation='softmax')(dense_1)
    optimiser = keras.optimizers.SGD(learning_rate =learning_rate)
    losses = keras.losses.CategoricalCrossentropy()
    metrics = keras.metrics.Accuracy()
    model = keras.Model(inputs = ipt_layer,outputs = op_layer)
    model.compile(optimizer=optimiser,loss=losses,metrics=metrics)
    return model

In [None]:
#instantiating the training model
training_model = base_model(length,length*length*3,10)

In [None]:
#printing the training model summary
training_model.summary()

In [None]:
#Creating a training-validation split with the validation size being equal to 0.1 

xtrain,xval,ytrain,yval = train_test_split(training_data,classes,test_size=0.1,shuffle=True,random_state=seed)

In [None]:
#Printing the length of the training and validation dataset
print(len(xtrain),len(xval))
print(len(ytrain),len(yval))

In [None]:
#Instantiating the training and validation datasets from the custom keras datasets
training_dataset = Dataset(xtrain,ytrain,batch_size=batch_size,augmentations=training_transforms,test=False)
validation_dataset = Dataset(xval,yval,batch_size=batch_size,augmentations=training_transforms,test=False)

print(len(training_dataset))
print(len(validation_dataset))

In [None]:
model_file = "output/base_model_weights.hd5" # Setting the paths to save the directory where the keras models are stored

checkpoint = tf.keras.callbacks.ModelCheckpoint(model_file, monitor="val_accuracy", save_best_only=True, mode='max') # Save the deep learning model which has the best val acc

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_accuracy", factor=0.9, patience=2, min_lr=1e-6, mode="max", verbose=True) # Reducing the LR by 10% if the val acc does not improve

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=5, mode="max", verbose=True) # Stop training the model if the val acc does not improve

callbacks_list = [checkpoint, reduce_lr, early_stopping] # Creating a list of every callback into one variable


In [None]:
#training the model
training_model.fit(training_dataset,validation_data=validation_dataset,epochs=40,batch_size=batch_size,callbacks=callbacks_list,verbose=1)

In [None]:
#reading the submission file from the disk
sample_submission = pd.read_csv('/kaggle/input/sp-society-camera-model-identification/sample_submission.csv')
sample_submission.head()

In [None]:
#Reading the submission files into a testing dataframe and instantiating a testing dataset

x_test = testing_path+sample_submission['fname']
#x_test = x_test.to_numpy()
print(x_test[0])

#training_model.load_model(model_file)
x_test = Dataset(x_test,x_test,test=True,batch_size=1,augmentations=testing_augmentation)

In [None]:
#alternative function to read the images from disk
def read_and_array(filepath):
    im_array = np.array(Image.open(filepath))
    return new_array

In [None]:
#Read the best model into memory and make predictions for the test dataset
#training_model = tf.keras.models.load_model(model_file)
predicted = training_model.predict(x_test)

In [None]:
#print the predictions for the test
print(predicted)

In [None]:
#convert the test predictions from one-hot encoding to labels


test_labels = []
for item in predicted.argmax(axis=1):
    test_labels.append(classes_dict[str(item)])

In [None]:
#enter the test predictions into a submission file 

sample_submission['camera'] = test_labels
sample_submission.head()

In [None]:
#Write the submissions into a .csv file
sample_submission.to_csv("submission.csv", index=False)