# Deep Learning Template for Unstructured Data in Keras

## Step 0: Import Libraries

In [100]:
import keras
import cv2 as cv2
import numpy as np
import pandas as pd
import sklearn
import os

from sklearn.preprocessing import LabelBinarizer
from zipfile import ZipFile
from urllib.request import urlretrieve

from keras.layers.convolutional import Convolution2D,Cropping2D
from keras.layers.core import Flatten,Dense,Dropout
from keras.layers.pooling import MaxPooling2D
from keras.layers import Input
from keras.models import Model
from keras.regularizers import *
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split

## Step 1: Download DataSet

In [54]:
class TqdmUpTo(tqdm):
    """Provides `update_to(n)` which uses `tqdm.update(delta_n)`."""
    def update_to(self, b=1, bsize=1, tsize=None):
        """
        b  : int, optional
            Number of blocks transferred so far [default: 1].
        bsize  : int, optional
            Size of each block (in tqdm units) [default: 1].
        tsize  : int, optional
            Total size (in tqdm units). If [default: None] remains unchanged.
        """
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)  # will also set self.n = b * bsize

def downloadDataset(url, filename):
    if not os.path.isfile(filename):
        print("Downloading file... " + filename + " ...")
        with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,desc='data.zip') as t:  # all optional kwargs
            urlretrieve(url,filename, reporthook=t.update_to, data=None)
        print("File downloaded")
    else:
        print("File Already exists")
        
def uncompressData(filename, name_of_folder=None):
    print("Extracting Data")
    if(name_of_folder != None and  os.path.isdir(name_of_folder)):
        print('Data already Extracted')
    else:
        with ZipFile(filename) as zipf:
            zipf.extractall(name_of_folder)
        print("Data Extracted")

In [47]:
Download_Dataset('https://s3-us-west-2.amazonaws.com/deepcognition/datasets/Soda_Bottles.zip', 'Soda_Bottles.zip')

File Already exists


In [52]:
Uncompress_Data('Soda_Bottles.zip')

Extracting Data


TypeError: _isdir() takes exactly one argument (0 given)

## Step 2: Read CSV

In [55]:
def readCSV(filename):
    df = pd.read_csv(filename)
    return df

In [56]:
df = readCSV('./Soda Bottles/train.csv') 
df.head()

Unnamed: 0,Label,Filename
0,MD.Diet,./MD.Diet/5653.jpg
1,MD.Diet,./MD.Diet/5647.jpg
2,MD.Diet,./MD.Diet/5690.jpg
3,MD.Diet,./MD.Diet/5684.jpg
4,MD.Diet,./MD.Diet/5479.jpg


## Step 3: Seperate out Features and Labels

In [63]:
def returnFeaturesAndLabels(dataframe , featureStartIndex, featureEndIndex, labelStartIndex, labelEndIndex):
    return dataframe.iloc[:,featureStartIndex:featureEndIndex].values, dataframe.iloc[:,labelStartIndex:labelEndIndex].values

In [66]:
X , y = returnFeaturesAndLabels(df,1,2,0,1)
print(X[0:5])
print(y[0:5])

[['./MD.Diet/5653.jpg']
 ['./MD.Diet/5647.jpg']
 ['./MD.Diet/5690.jpg']
 ['./MD.Diet/5684.jpg']
 ['./MD.Diet/5479.jpg']]
[['MD.Diet']
 ['MD.Diet']
 ['MD.Diet']
 ['MD.Diet']
 ['MD.Diet']]


## Step 4: Visualize Data

In [96]:
def displayRandomImage(X):
    pass

def totalImagesAndLabels(X, y):
    print("Shape of Features is ", X.shape,"Shape of Labels is ",y.shape)

def displayClassesCount(df , colname):
    abc= df.pivot_table(index = colname, aggfunc = len).sort_values(colname, ascending = False)
    print(abc)
    
def displaySizeCount():
    # Do it later
    train_img_df['images'].map(lambda x: x.shape).value_counts()

In [91]:
displayClassesCount(df,'Label')

#print(CheckClasses)

          Filename
Label             
P.diet         850
P.Zero         824
P.Rsugar       825
P.Orig         800
P.Cherry       793
MD.Orig        844
MD.Diet        839
M.Beer         840


In [97]:
totalImagesAndLabels(X,y)

Shape of Features is  (6615, 1) Shape of Labels is  (6615, 1)


## Step 4: One Hot/ Label Encode the Labels

In [99]:
def labelEncode(labels):
    labelencoder_y = LabelEncoder()
    y = labelencoder_y.fit_transform(y)

## Step 5: Train_Test_Split

In [101]:
def split_dataset(X,y, testSize = 0.2):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testSize , shuffle=True)

## Step 6: Load Images

In [None]:
## define generator

def generator_new(X,y, batch_size=32):
    total_samples = len(X)
    while 1:
    
    for offset in range(0, total_samples, batch_size): # start, stop and step
        batch_samples = X[offset:offset+batch_size]
        y_samples = y[offset:offset+batch_size]
        images= []
        labels =[]
        for i in range(len(batch_samples)):
        #print(item[0])
        path = "./Soda Bottles/"+batch_samples[i][2:]
        #print(path)
        #print(path)
        local_image= cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
        local_image = cv2.resize(local_image, (0,0), fx=0.5, fy=0.5) 
        images.append(local_image)
        labels.append(y_samples[i])
        X_train = np.array(images)
        y_train = np.array(labels)
        #y_train = one_hot_encode_labels(y_train)

        yield sklearn.utils.shuffle(X_train, y_train)

## Step 7: Define Model

In [None]:
## define model here

def get_model_new():
  model = Sequential()
  model.add(Lambda(lambda x: (x / 255.0) - 0.5, input_shape=(240,320,3)))
  model.add(Cropping2D(cropping=((50,0),(50,50))))
  model.add(Convolution2D(24,5,5,subsample=(2,2)))
  model.add(Activation('relu'))
  model.add(Flatten())
  model.add(Dense(100))
  model.add(Activation('relu'))
  model.add(Dense(8))
  model.add(Activation('sigmoid'))
  return model

## Step 8: Set Hyperparameters

In [None]:
## Step 8: Define Hyperparameters
def get_optimizer():
    return 'Adam'

def is_custom_loss_function():
    return False

def get_loss_function():
    return 'sparse_categorical_crossentropy'

def get_batch_size():
    return 32

def get_num_epoch():
    return 10

## Step 9: Compile Model

In [None]:
model = get_model()
model.compile(loss=get_loss_function(),optimizer=get_optimizer(), metrics=['accuracy','categorical_accuracy'])

## Step 10: Train Model

In [None]:
model.fit_generator(train_generator, steps_per_epoch= len(X_train)//get_batch_size(), validation_data=valid_generator,   nb_val_samples=len(X_test)//get_batch_size(), nb_epoch=5, verbose=1)


## Step 11: Visualize loss and Peformance

## Step 12: Inference

In [None]:
## check performance on the training set

## Step 13: Save Model