### Helper Function

In [1]:
#libraries needed for the custom helper function
import os
import numpy as np
import cv2
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import pandas as pd
import matplotlib.pyplot as plt
import random

## custom function for loading data from the directory
def load_data(base_folder_path,resize=(224,224),test_size=0.2, random_state = 4):
    data=[] #for storing images
    labels=[]   #for storing labels
    path_images = []    #for storing image path
    
    types = sorted(list(os.listdir(base_folder_path)))
    
    #storing images, labels and path 
    for item in types:
        path = sorted(list(os.listdir(base_folder_path+'/'+item)))
        for images in path:
            img_path = base_folder_path+'/'+item+'/'+images
            path_images.append((item,img_path))
            img = cv2.imread(img_path)
            img = cv2.resize(img,resize)
            data.append(img)
            labels.append(item)
    
    #converting images to numpy array and normalizing
    data = np.array(data)
    data = data.astype('float32')/255
    
    #encoding labels
    labels = np.array(labels)
    Lbinary = LabelBinarizer()
    Y = Lbinary.fit_transform(labels)
    
    #for storing labels in dataframe
    enc = []
    for i in range(len(Y)):
        enc.append(' '.join(str(e) for e in Y[i]))
    
    #generating pandas data frame    
    data_df = pd.DataFrame(data=path_images,columns=['items','path'])
    data_df.insert(2,"label encoding",enc,True)
    
    Xs,Ys = shuffle(data,Y)
    X_train,X_test,Y_train,Y_test = train_test_split(Xs,Ys,test_size=test_size,random_state=random_state)
    
    return data_df,X_train,X_test,Y_train,Y_test

## custom function for plotting accuracy with the given model history
def plot(history):
    %matplotlib inline
    acc=history.history['accuracy']
    val_acc=history.history['val_accuracy']
    loss=history.history['loss']
    val_loss=history.history['val_loss']

    epochs=range(len(acc)) #No. of epochs
    plt.plot(epochs,acc,'b',label='Training Accuracy')
    plt.plot(epochs,val_acc,'r',label='Validation Accuracy')
    plt.legend()
    plt.figure()

    #Plot training and validation loss per epoch
    plt.plot(epochs,loss,'b',label='Training Loss')
    plt.plot(epochs,val_loss,'r',label='Validation Loss')
    plt.legend()
    plt.show()

## custom function for performing model evaluation
def my_model_evaluation(model,X_test,Y_test):
    xtest=X_test
    ytest=Y_test
    ypred = model.predict(xtest)

    total = 0
    accurate = 0
    accurateindex = []
    wrongindex = []

    for i in range(len(ypred)):
        if np.argmax(ypred[i]) == np.argmax(ytest[i]):
            accurate += 1
            accurateindex.append(i)
        else:
            wrongindex.append(i)
            
        total += 1
        
    print('Total-test-data;', total, '\taccurately-predicted-data:', accurate, '\t wrongly-predicted-data: ', total - accurate)
    print('Accuracy:', round(accurate/total*100, 3), '%')
    
    return accurateindex, wrongindex

## custom function for seeing model output
def my_model_output(dataset_path, model,X_test, Y_test,accurateindex, wrongindex,correct=True):    
        xtest=X_test
        ytest=Y_test
        ypred = model.predict(xtest)
        label=sorted(list(os.listdir(dataset_path)))
        if(correct):
            imidx = random.sample(accurateindex, k=9)# replace with 'wrongindex'
        else:
            imidx = random.sample(wrongindex, k=9)# replace with 'wrongindex'
        
        nrows = 3
        ncols = 3
        fig, ax = plt.subplots(nrows,ncols,sharex=True,sharey=True,figsize=(15, 12))

        n = 0
        for row in range(nrows):
            for col in range(ncols):
                    ax[row,col].imshow(xtest[imidx[n]])
                    ax[row,col].set_title("Predicted label :{}\nTrue label :{}".format(label[np.argmax(ypred[imidx[n]])], label[np.argmax(ytest[imidx[n]])]))
                    n += 1

        plt.show()    
        

### Loading Data

In [4]:
#loading data
img_rows, img_cols = 224,224
base_folder = "cars"
df,X_train,X_test,Y_train,Y_test = load_data(base_folder_path= base_folder,resize=(img_rows,img_cols),test_size=0.05,random_state=10)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'cars'