In [1]:
import numpy as np
import tensorflow as tf
import cv2
import time
import os
import sys

  from ._conv import register_converters as _register_converters


In [2]:
class darknet19_core(object):
    def __init__(self, nameScope='dartnet19_core', trainable=True, bnPhase=True, reuse=False, activation = tf.nn.leaky_relu):
        self._reuse = reuse
        self._trainable = trainable
        self._bnPhase = bnPhase
        self._nameScope = nameScope
        self._activation = activation
        self.variables = None
        self.update_ops = None
        self.saver = None
    def _conv(self, inputs, filters, kernel_size):
        hiddenC = tf.layers.conv2d(inputs=inputs, filters=filters, kernel_size=kernel_size, strides=1, padding='same', activation=None, trainable=self._trainable, use_bias=False)
        hiddenC = tf.layers.batch_normalization(inputs=hiddenC, training=self._bnPhase, trainable=self._trainable)
        hiddenC = self._activation(hiddenC)
        print hiddenC.shape
        return hiddenC
    def _maxPool(self, inputs, pool_size=(2,2), strides=2, padding='same'):
        hiddenP = tf.layers.max_pooling2d(inputs, pool_size=pool_size, strides=strides, padding=padding)
        print hiddenP.shape
        return hiddenP
    def __call__(self, inputImg):
        with tf.variable_scope(self._nameScope, reuse=self._reuse):
            hiddenC1 = self._conv(inputs=inputImg, filters=32, kernel_size=3)
            hiddenP1 = self._maxPool(inputs=hiddenC1)
            
            hiddenC2 = self._conv(inputs=hiddenP1, filters=64, kernel_size=3)
            hiddenP2 = self._maxPool(inputs=hiddenC2)
            
            hiddenC31 = self._conv(inputs=hiddenP2, filters=128, kernel_size=3)
            hiddenC32 = self._conv(inputs=hiddenC31, filters=64, kernel_size=1)
            hiddenC33 = self._conv(inputs=hiddenC32, filters=128, kernel_size=3)
            hiddenP3 = self._maxPool(inputs=hiddenC33)
            
            hiddenC41 = self._conv(inputs=hiddenP3, filters=256, kernel_size=3)
            hiddenC42 = self._conv(inputs=hiddenC41, filters=128, kernel_size=1)
            hiddenC43 = self._conv(inputs=hiddenC42, filters=256, kernel_size=3)
            hiddenP4 = self._maxPool(inputs=hiddenC43)
            
            hiddenC51 = self._conv(inputs=hiddenP4, filters=512, kernel_size=3)
            hiddenC52 = self._conv(inputs=hiddenC51, filters=256, kernel_size=1)
            hiddenC53 = self._conv(inputs=hiddenC52, filters=512, kernel_size=3)
            hiddenC54 = self._conv(inputs=hiddenC53, filters=256, kernel_size=1)
            hiddenC55 = self._conv(inputs=hiddenC54, filters=512, kernel_size=3)
            hiddenP5 = self._maxPool(inputs=hiddenC55)
            
            hiddenC61 = self._conv(inputs=hiddenP5, filters=1024, kernel_size=3)
            hiddenC62 = self._conv(inputs=hiddenC61, filters=512, kernel_size=1)
            hiddenC63 = self._conv(inputs=hiddenC62, filters=1024, kernel_size=3)
            hiddenC64 = self._conv(inputs=hiddenC63, filters=512, kernel_size=1)
            hiddenC65 = self._conv(inputs=hiddenC64, filters=1024, kernel_size=3)
        self._reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self._nameScope)
        self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self._nameScope)
        self.saver = tf.train.Saver(var_list=self.variables)
        outputs = hiddenC65
        return outputs
class darknet19_pretraining(object):
    def __init__(self, classNum, nameScope='darknet19_pretraining', trainable=True, bnPhase=True, reuse=False, activation = tf.nn.leaky_relu):
        self._classNum = classNum
        self._nameScope = nameScope
        self._trainable = trainable
        self._bnPhase = bnPhase
        self._reuse = reuse
        self._activation = activation
        self.variables = None
        self.update_ops = None
        self.saver = None
    def __call__(self, inputImg):
        with tf.variable_scope(self._nameScope, reuse=self._reuse):
            hiddenC1 = tf.layers.conv2d(inputs=inputImg, filters=self._classNum, kernel_size=1, strides=1, padding='same', activation=None, trainable=self._trainable, use_bias=True)
            hiddenP1 = tf.reduce_mean(hiddenC1, axis=[1,2])
            hiddenB1 = tf.nn.sigmoid(hiddenP1)
            print hiddenB1.shape
        self._reuse=True
        self.variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self._nameScope)
        self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self._nameScope)
        self.saver = tf.train.Saver(var_list=self.variables)
        outputs = hiddenB1
        return outputs
class darknet_classifier(object):
    def __init__(self, dataPath='./', imgSize = (416,416), batchSize = 64, learningRate = 0.001):
        self._imgList = None
        self._imgClassList = None
        self._dataPath = dataPath
        self._imgSize = imgSize
        self._batchSize = batchSize
        self._lr = learningRate
        self._classNum = None
        self.variables = None
        self.update_ops = None
        self._inputImg = None
        self._outputClass = None
        self._outputClassGT = None
        self._optimizer = None
        self._loss = None
        self._loadDataset()
        self._buildNetwork()
        self._createLossAndOptimizer()
        #init the session
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.90)
        self._sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))        
        #initialize variables
        init = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        )
        #launch the session
        self._sess.run(init)
    def _loadDataset(self):
        print "load Dataset..."
        self._imgList = []
        imgListTemp = np.load(os.path.join(self._dataPath,'imgList.npy'))
        self._imgClassList = np.load(os.path.join(self._dataPath+'imgClassList.npy'))
        self._classNum = self._imgClassList.shape[1]
        for i in range(len(imgListTemp)):
            img = cv2.resize(imgListTemp[i], self._imgSize)
            img = img.reshape((self._imgSize[0], self._imgSize[1],1))
            self._imgList.append(img)
        self._imgList = np.array(self._imgList)
        print "done!"
    def _buildNetwork(self):
        print "build network..."
        self._inputImg = tf.placeholder(tf.float32, shape=(None, self._imgSize[0], self._imgSize[1], 1))
        self._outputClassGT = tf.placeholder(tf.float32, shape=(None, self._classNum))
        self._darknetCore = darknet19_core()
        self._pretraining = darknet19_pretraining(self._classNum)
        coreOutput = self._darknetCore(self._inputImg)
        self._outputClass = self._pretraining(coreOutput)
        print "done!"
    def _createLossAndOptimizer(self):
        print "create loss and optimizer..."
        self._optimizer = tf.train.AdamOptimizer(learning_rate=self._lr)
        def binaryLoss(xPred, xTarget, epsilon=1e-7):
            yTarget = xTarget
            yPred = tf.clip_by_value(xPred, clip_value_min=epsilon, clip_value_max=1.0-epsilon)
            bce_loss = - tf.reduce_sum(yTarget*tf.log(yPred) + (1.0-yTarget)*tf.log(1.0-yPred), axis=-1)
            return bce_loss
        self._loss = tf.reduce_mean(binaryLoss(xPred=self._outputClass, xTarget=self._outputClassGT))
        with tf.control_dependencies(self._darknetCore.update_ops + self._pretraining.update_ops):
            self._optimizer = self._optimizer.minimize(
                self._loss, var_list = self._darknetCore.variables + self._pretraining.variables
            )
        print "done!"
    def _saveNetwork(self, savePath='./'):
        dCorePath = os.path.join(savePath,'/dCore.ckpt')
        pretrainPath = os.path.join(savePath,'/pretrain.ckpt')
        self._darknetCore.saver.save(dCorePath)
        self._pretraining.saver.save(pretrainPath)
    def _restoreNetwork(self, restorePath='./'):
        dCorePath = os.path.join(restorePath,'/dCore.ckpt')
        pretrainPath = os.path.join(restorePath,'/pretrain.ckpt')
        self._darknetCore.saver.restore(dCorePath)
        self._pretraining.saver.restore(pretrainPath)
    def _fit(self, batchImg, batchClassIndex):
        feed_dict = {
            self._inputImg : batchImg,
            self._outputClassGT : batchClassIndex
        }
        acc = (tf.reduce_sum((1-self._outputClass)*(1-self._outputClassGT))+tf.reduce_sum(self._outputClass*self._outputClassGT))\
        /(tf.reduce_sum(self._outputClassGT)+tf.reduce_sum(1-self._outputClassGT))
        _, lossResult = self._sess.run([self._optimizer, self._loss], feed_dict=feed_dict)
        accResult = self._sess.run(acc, feed_dict=feed_dict)
        return accResult, lossResult
    def train(self, epoch = 10000):
        currEpoch = 0
        dataCompleted = 0
        loss = 0
        acc = 0
        runTime = 0
        for i in range(int(epoch/self._batchSize)):
            for i in range(int(len(self._imgList)/self._batchSize)):
                startTime = time.time()
                start = i * self._batchSize
                end = np.min((start+self._batchSize, len(self._imgList)))
                accTemp, lossTemp = self._fit(self._imgList[start:end], self._imgClassList[start:end])
                endTime = time.time()
                runTimeTemp = endTime - startTime
                acc = float(acc*currEpoch + accTemp)/float(currEpoch+1.0)
                loss = float(loss*currEpoch + lossTemp)/float(currEpoch+1.0)
                runTime = float(runTime*currEpoch + runTimeTemp)/(currEpoch+1.0)
                sys.stdout.write('Epoch:{:05d} round:{:04d} runtime:{:.3f} '.format(int(currEpoch+1), int(dataCompleted+1), runTime))
                sys.stdout.write('curr/total:{:05d}/{:05d} '.format(start, len(self._imgList)))
                sys.stdout.write('loss:{:.3f} acc:{:.3f}\r'.format(loss, acc))
                currEpoch += 1
                if currEpoch%1000 == 0:
                    self._saveNetwork()
            dataCompleted +=1

In [None]:
darkClassifier = darknet_classifier(batchSize=32)

load Dataset...
done!
build network...
(?, 416, 416, 32)
(?, 208, 208, 32)
(?, 208, 208, 64)
(?, 104, 104, 64)
(?, 104, 104, 128)
(?, 104, 104, 64)
(?, 104, 104, 128)
(?, 52, 52, 128)
(?, 52, 52, 256)
(?, 52, 52, 128)
(?, 52, 52, 256)
(?, 26, 26, 256)
(?, 26, 26, 512)
(?, 26, 26, 256)
(?, 26, 26, 512)
(?, 26, 26, 256)
(?, 26, 26, 512)
(?, 13, 13, 512)
(?, 13, 13, 1024)
(?, 13, 13, 512)
(?, 13, 13, 1024)
(?, 13, 13, 512)
(?, 13, 13, 1024)
(?, 24)
done!
create loss and optimizer...
done!


In [None]:
darkClassifier.train()

Epoch:0224 round:001 runtime:0.731 curr/total:07136/10291 loss:5.291 acc:0.8865

KeyboardInterrupt: 