# Classification - Train

In [1]:
import yaml
import os
import shutil
import subprocess
import datetime
import numpy
import random
import sklearn.metrics
import tensorflow

print("INFO> TensorFlow version: %s" % tensorflow.__version__)
print("INFO> Num GPUs Available: ", len(tensorflow.config.experimental.list_physical_devices('GPU')))

INFO> TensorFlow version: 1.14.0
INFO> Num GPUs Available:  1


In [2]:
# Read parameters from local config.yaml file, and update corresponding Python variables
currentDir = os.getcwd()
print("INFO> Reading file config.yam from directory: %s" % currentDir)
yamlFile = open('config.yaml','r')
yamlData = yaml.load(yamlFile,Loader=yaml.Loader)

for key in sorted(yamlData):
    print("INFO> %-15s: %s" % (key,yamlData[key]))

batchSize = yamlData['batchSize']
checkDataset = yamlData['checkDataset']
checkpointDir = yamlData['checkpointDir']
createDataset = yamlData['createDataset']
datasetDir = yamlData['datasetDir']
goldenDataset = yamlData['goldenDataset']
imageHeight = yamlData['imageHeight']
imageWidth = yamlData['imageWidth']
learningRate = float(yamlData['learningRate'])
logDir = yamlData['logDir']
nEpochs = yamlData['nEpochs']
nTrnSamples = yamlData['nTrnSamples']
nTstSamples = yamlData['nTstSamples']
nValSamples = yamlData['nValSamples']
remDir = os.path.join(yamlData['datasetDir'], yamlData['remDir'])
tmpDir = os.path.join(yamlData['datasetDir'], yamlData['tmpDir'])
trnDir = os.path.join(yamlData['datasetDir'], yamlData['trnDir'])
tstDir = os.path.join(yamlData['datasetDir'], yamlData['tstDir'])
valDir = os.path.join(yamlData['datasetDir'], yamlData['valDir'])

INFO> Reading file config.yam from directory: /raid5/disk1/mlproj10/classification
INFO> batchSize      : 16
INFO> checkDataset   : True
INFO> checkpointDir  : /home/jmv/data/mlproj10/tmp/
INFO> createDataset  : True
INFO> datasetDir     : /home/jmv/data/mlproj10/dataset
INFO> goldenDataset  : /home/jmv/data/mlproj10/LeryPosesGolden/dataset
INFO> imageHeight    : 360
INFO> imageWidth     : 640
INFO> learningRate   : 1e-6
INFO> logDir         : /home/jmv/data/mlproj10/log/
INFO> nEpochs        : 1000
INFO> nTrnSamples    : 2000
INFO> nTstSamples    : 671
INFO> nValSamples    : 671
INFO> remDir         : rem
INFO> tmpDir         : tmp
INFO> trnDir         : trn
INFO> tstDir         : tst
INFO> valDir         : val


In [3]:
# Optionally create dataset from golden data set & check the newly created dataset
if createDataset:
    # Remove datasetDir folder & sub-folders - and then create it again to start from scratch
    shutil.rmtree(datasetDir, ignore_errors=True)
    os.mkdir(datasetDir)
    
    # For each class from goldenDataset:
    #  - create a corresponding subfolder in tmpDir
    #  - copy all images from the corresponding class in goldenDataset to the corresponding class in tmpDir
    listOfSubDirs = [subdir[1] for subdir in os.walk(goldenDataset)][0]
    for myClass in listOfSubDirs:
        print(f"INFO> Processing folder (1st pass): {myClass}")
        os.makedirs(os.path.join(tmpDir,myClass))

        for myImage in [image[2] for image in os.walk(os.path.join(goldenDataset,myClass))][0]:
            shutil.copyfile(os.path.join(goldenDataset,myClass,myImage),os.path.join(tmpDir,myClass,myImage))
        os.makedirs(os.path.join(trnDir,myClass))
        os.makedirs(os.path.join(valDir,myClass))
        os.makedirs(os.path.join(tstDir,myClass))
        os.makedirs(os.path.join(remDir,myClass))

    # Oversampling section
    def overSample(myList,actualLength,desiredLength): 
        if actualLength==desiredLength:
            return myList
        else:
            newList = numpy.append(myList,numpy.random.choice(myList,desiredLength-actualLength)) 
            return numpy.random.choice(newList,desiredLength,replace=False)
    
    def copyImages(tmpDir, subDir, listOfImages, dstDir):
        cntDict={}
        for image in listOfImages:
            cntDict[image] = cntDict.get(image,0)+1
            if cntDict[image]==1:
                dstImage = image
            else:
                fileName, fileExtension = os.path.splitext(image)
                dstImage = fileName+"_copy_"+str(cntDict[image])+fileExtension
            shutil.copyfile(os.path.join(tmpDir,subDir,image),os.path.join(dstDir,subDir,dstImage))
        
    random.seed(42)
    
    listOfSubDir = [subdir[1] for subdir in os.walk(tmpDir)][0]
    for subDir in listOfSubDir:
        print("INFO> Processing folder (2nd pass): %s" % subDir)
    
        initialList=os.listdir(os.path.join(tmpDir,subDir))
        nInitialList = len(initialList)
        randomInitialList = random.sample(initialList,k=nInitialList)
        
        # Split randomized list of images according to the training, validation & test ratio
        nTotalSamples = nTrnSamples + nValSamples + nTstSamples
        trnN = min(nTrnSamples,int(nInitialList*nTrnSamples/nTotalSamples))
        valN = min(nTrnSamples+nValSamples,int(nInitialList*(nTrnSamples+nValSamples)/nTotalSamples))
        tstN = min(nTotalSamples,nInitialList)
        trn, val, tst, rem = numpy.split(randomInitialList,[trnN, valN, tstN])
    
        # Oversampling scheme
        trnLen = trnN
        valLen = valN-trnN
        tstLen = tstN-valN
        fullTrn = overSample(trn,trnLen,nTrnSamples)
        fullVal = overSample(val,valLen,nValSamples)
        fullTst = overSample(tst,tstLen,nTstSamples)
        fullRem = rem # No oversampling for rem
    
        # Copy images in folders
        copyImages(tmpDir, subDir, fullTrn, trnDir)
        copyImages(tmpDir, subDir, fullVal, valDir)
        copyImages(tmpDir, subDir, fullTst, tstDir)        
        copyImages(tmpDir, subDir, fullRem, remDir)   
    
if checkDataset:
    for subDir in sorted(os.listdir(datasetDir)):
        print(f"INFO> Processing folder: {subDir}")
        for myClass in sorted(os.listdir(os.path.join(datasetDir, subDir))):
            nImages = len(os.listdir(os.path.join(datasetDir, subDir, myClass)))
            print(f"INFO>     Number of samples in class {myClass}: {nImages}")

Processing folder (1st pass): 0windsurf
Processing folder (1st pass): 1windsurf
Processing folder (2nd pass): 0windsurf
Processing folder (2nd pass): 1windsurf
INFO> Processing folder: rem
INFO>     Number of samples in class 0windsurf: 414
INFO>     Number of samples in class 1windsurf: 16
INFO> Processing folder: tmp
INFO>     Number of samples in class 0windsurf: 3756
INFO>     Number of samples in class 1windsurf: 3358
INFO> Processing folder: trn
INFO>     Number of samples in class 0windsurf: 2000
INFO>     Number of samples in class 1windsurf: 2000
INFO> Processing folder: tst
INFO>     Number of samples in class 0windsurf: 671
INFO>     Number of samples in class 1windsurf: 671
INFO> Processing folder: val
INFO>     Number of samples in class 0windsurf: 671
INFO>     Number of samples in class 1windsurf: 671


In [4]:
myModelInput = tensorflow.keras.layers.Input(shape=(imageHeight,imageWidth,3))
x = tensorflow.keras.layers.Conv2D(64, (3,3), activation="relu")(myModelInput)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(128, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(192, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(192, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(192, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(128, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Flatten()(x)
x = tensorflow.keras.layers.Dense(128, activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)

x = tensorflow.keras.layers.Dropout(0.25)(x)
myModelOutput = tensorflow.keras.layers.Dense(1, activation="sigmoid")(x)

model = tensorflow.keras.models.Model(inputs=myModelInput, outputs=myModelOutput)

model.summary()

tensorflow.keras.optimizers.RMSprop(lr=learningRate)
 
model.compile(loss=tensorflow.keras.losses.BinaryCrossentropy(),
              optimizer='rmsprop',
              metrics=['accuracy']) # should be accuracy in TF2.0"

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 360, 640, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 358, 638, 64)      1792      
_________________________________________________________________
batch_normalization (BatchNo (None, 358, 638, 64)      256       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 179, 319, 64)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 177, 317, 128)     73856     
_________________________________________________________________
batch_normalization_1 (Batch (None, 177, 317, 128)     512       
_________

In [None]:
trnDataGen = tensorflow.keras.preprocessing.image.ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

valDataGen = tensorflow.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)

trnGenerator = trnDataGen.flow_from_directory(
    trnDir,
    target_size=(imageHeight,imageWidth),
    batch_size=batchSize,
    class_mode='binary')

valGenerator = valDataGen.flow_from_directory(
    valDir,
    target_size=(imageHeight,imageWidth),
    batch_size=batchSize,
    class_mode='binary')

timeNow = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
fullCheckpointDir = os.path.join(checkpointDir,timeNow)
os.mkdir(fullCheckpointDir)
                                           
# need to replace acc by accuracy below when moving to TF2.0    
filePath = os.path.join(fullCheckpointDir,"{epoch:05d}_{loss:.6f}_{acc:.6f}_{val_loss:.6f}_{val_acc:.6f}.h5")
checkpoint = tensorflow.keras.callbacks.ModelCheckpoint(filePath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', save_freq='epoch')

# profile_batch=0 required to solve a bug w/ Tensorboard according to     
#   https://github.com/tensorflow/tensorboard/issues/2412    
fullLogDir = os.path.join(logDir, timeNow)
tensorboardCallback = tensorflow.keras.callbacks.TensorBoard(log_dir=fullLogDir,profile_batch=0)

history = model.fit_generator(
    trnGenerator,
    steps_per_epoch=nTrnSamples // batchSize,
    epochs=nEpochs,
    validation_data=valGenerator,
    validation_steps=nValSamples // batchSize,
    callbacks=[tensorboardCallback,checkpoint])

Found 4000 images belonging to 2 classes.
Found 1342 images belonging to 2 classes.
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000


Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000