In [1]:
import pandas as pd
import tensorflow as tf
import cv2 as cv
import os
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET #tree import 
import numpy as np

import matplotlib.pyplot as plt

In [2]:
tf.config.list_physical_devices('GPU')

[]

In [3]:
VALIDATION_IMAGES = 200
TRAIN_IMAGES = 2000

IMAGE_SIZE = 224
GRID_SIZE = 5
BOX_SIZE = 1 #originally 2 boxes -> choose the one with the highest iou => we will not do in here
NUM_CLASS = 2
BATCH_SIZE = 128

In [7]:
#annotations = 'C:\\Users\\INFOSTAT-19\\Desktop\\annotations'

In [4]:
classesNum = {'dog': 0, 'cat': 1}
def convertFunction(folder, name, file):
    path = folder + '/' + name
    path = os.path.normpath(path)
    tree = ET.parse(path)
    root = tree.getroot()

    file.write(path.replace('xml', 'png'))
    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        className = obj.find('name').text
        if className not in classesNum.keys() or int(difficult) == 1:
            continue

        #get bounding box
        box = ( int(obj.find('bndbox').find('xmin').text),
                int(obj.find('bndbox').find('ymin').text),
                int(obj.find('bndbox').find('xmax').text),
                int(obj.find('bndbox').find('ymax').text))

        id = list(classesNum.keys()).index(className)

        #write to file
        file.write(' ' + ','.join([str(a) for a in box]) + ',' + str(id))
    file.write('\n')

with open(os.path.join('%s.txt' % ('annotations')) , 'w') as f:
    for file in os.listdir('annotations'):
        if file.endswith('.xml'):
            convertFunction(folder = 'annotations', name=str(file), file=f)
            

In [6]:
train_datasets = []
with open(os.path.normpath('annotations.txt'), 'r') as f:
    train_datasets = f.readlines()

In [7]:
len(train_datasets)

3686

In [8]:
val_datasets = train_datasets[:VALIDATION_IMAGES]
train_datasets = train_datasets[VALIDATION_IMAGES : VALIDATION_IMAGES + TRAIN_IMAGES]

In [9]:
print('Train images: ', len(train_datasets), 'Val images: ', len(val_datasets))

Train images:  2000 Val images:  200


In [10]:
classArray  = list(classesNum.keys())

def annotationConverting(dataset):
    X, Y = [], []
    for item in dataset:
        item = item.replace("\n", "").split(" ")
        X.append(item[0])
        arr = []
        for i in range(1, len(item)):
            arr.append(item[i])
        Y.append(arr)
    return X,Y
#def testPrintItem(Image,target):
   #pass

In [11]:
X_train, Y_train = annotationConverting(train_datasets)
X_val, Y_val = annotationConverting(val_datasets)

In [31]:
Y_train[1]

['69,42,239,224,0']

In [12]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, images, labels, shuffle=False):
        self.images = images
        self.labels = labels
        self.shuffle = shuffle
    
    def __len__(self):
        return (np.ceil(len(self.images) / int(BATCH_SIZE))).astype(int)
    
    def __getitem__(self, idx):
        batch_x = self.images[idx * BATCH_SIZE : (idx + 1) * BATCH_SIZE] # images path
        batch_y = self.labels[idx * BATCH_SIZE : (idx + 1) * BATCH_SIZE] # raw label

        train_image, train_label = [], []
        for i in range(0, len(batch_x)):
            img_path, label = batch_x[i], batch_y[i]
            image, label_matrix = self.read(img_path, label) #actual image array (IMAGE_SIZE, IMAGE_SIZE, 3) (GRID_SIZE, GRID_SIZE, 5 * BOX_SIZE + CLASS) 
            train_image.append(image)
            train_label.append(label_matrix)
        
        if self.shuffle:
            indices = tf.range(start=0, limit=tf.shape(train_image)[0], dtype=tf.int32)
            idx = tf.random.shuffle(indices)
            train_image = tf.gather(train_image, idx)
            train_label = tf.gather(train_label, idx)

        return np.array(train_image, dtype=np.float32), np.array(train_label, dtype=np.float32)
    
    def read(self, img_path, label):
        image = cv.imread(img_path)
        h, w = image.shape[0:2]
        #h, w = image.shape[0]
        image = cv.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
        image = image / 255.

        label_matrix = np.zeros([GRID_SIZE, GRID_SIZE, 5 * BOX_SIZE + NUM_CLASS])
        for l in label:
            l = l.split(',')
            l = np.array(l, dtype=int)

            xmin, ymin, xmax, ymax = l[0] / w, l[1] / h, l[2] / w, l[3] / h # [0, 1]
            
            x, y = (xmin + xmax) / 2, (ymin + ymax) / 2
            w, h = xmax - xmin, ymax - ymin

            #convert x, y relative to the cell
            i, j = int(GRID_SIZE * y), int(GRID_SIZE * x)
            x = GRID_SIZE * x - j # 7 * [0,1] = [0, 7] e.g: 6.43 - int(6.43) = 0.43 => relative to the cell
            y = GRID_SIZE * y - i

            if l[4] == 0:
                label_matrix[i, j] = [x, y, w, h, 1, 1, 0]
            if l[4] == 1:
                label_matrix[i, j] = [x, y, w, h, 1, 0, 1]
        return image, label_matrix

In [None]:
#batch size 128 -> 1300 load all of them to our ram before they can go to the gpu for training -> memory expensive
#data generator gonna generator 128 images every step in the fit function -> not expensive!

In [33]:
print(Y_val)
#리스트로 나옴 -> 이걸 dataframe으로 만들어야 한다면..? 

[['83,29,197,142,1'], ['128,22,240,222,0'], ['157,56,264,116,0'], ['142,145,206,209,1'], ['36,14,289,312,1'], ['140,80,229,152,0'], ['178,107,294,223,1'], ['148,39,355,244,0'], ['53,27,239,121,0'], ['65,16,294,221,1'], ['95,20,299,290,0'], ['225,52,381,195,1'], ['108,149,199,229,1'], ['70,48,168,120,0'], ['134,79,297,234,1'], ['36,65,137,172,1'], ['112,64,217,164,1'], ['94,36,378,263,0'], ['130,38,237,132,0'], ['195,61,374,177,1'], ['94,35,235,136,0'], ['120,1,331,222,0'], ['336,15,486,158,1'], ['102,42,208,146,0'], ['160,6,416,265,1'], ['165,30,327,187,1'], ['194,121,295,228,0'], ['86,28,306,258,1'], ['85,43,210,164,0'], ['158,3,384,183,1'], ['65,36,199,166,0'], ['274,84,402,207,0'], ['1,42,328,308,1'], ['224,41,394,216,1'], ['83,1,217,147,0'], ['1,100,375,453,1'], ['95,17,258,167,0'], ['255,30,354,111,0'], ['100,15,241,135,0'], ['203,7,343,159,1'], ['87,26,288,249,0'], ['131,80,432,366,1'], ['118,10,292,217,1'], ['84,17,223,138,0'], ['88,85,266,231,0'], ['102,15,272,204,1'], ['126,10

In [13]:
trainingDataGenerator = DataGenerator(X_train, Y_train)
validationDataGenerator = DataGenerator(X_val, Y_val)

#datagenerator -> 

In [26]:
print(trainingDataGenerator)

<__main__.DataGenerator object at 0x000001C06AA12DF0>


In [14]:
x_train, y_train = trainingDataGenerator.__getitem__(0)

AttributeError: 'NoneType' object has no attribute 'shape'

In [15]:
classArray = list(classesNum.keys())
print(classArray)

['dog', 'cat']


In [16]:
def TestPrint(image, label):
    for i in range(GRID_SIZE):
        for j in range(GRID_SIZE):
            if label[i][j][4] > 0.5:
                print(label[i][j])
                x, y, w, h = label[i][j][:4]

                xmax = int(((x + j) / GRID_SIZE * IMAGE_SIZE) + (w * IMAGE_SIZE) / 2)
                xmin = int(((x + j) / GRID_SIZE * IMAGE_SIZE) - (w * IMAGE_SIZE) / 2)
                ymax = int(((y + i) / GRID_SIZE * IMAGE_SIZE) + (h * IMAGE_SIZE) / 2)
                ymin = int(((y + i) / GRID_SIZE * IMAGE_SIZE) - (h * IMAGE_SIZE) / 2)

                className = classArray[tf.argmax(label[i][j][5:], axis=-1)]
                cv.putText(image, className, (xmin, ymax + 10), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255))
                cv.rectangle(image, (xmin, ymin), (xmax, ymax), (255, 255, 255), 1)
    #image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    cv.imshow('Visualize', image)
    cv.waitKey(0)
    cv.destroyWindow('Visualize')


In [17]:
x_train, y_train = trainingDataGenerator.__getitem__(0)
idx = 6
TestPrint(x_train[idx], y_train[idx])

AttributeError: 'NoneType' object has no attribute 'shape'

In [33]:
class YoloActivation(tf.keras.layers.Layer):
    def call(self, inputs):
        classes = tf.nn.softmax(inputs[..., 5:], axis=-1)
        coordinates = tf.sigmoid(inputs[..., :5])
        return tf.concat([coordinates, classes], axis=-1)

In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Reshape, MaxPool2D, BatchNormalization

lrelu = tf.keras.layers.LeakyReLU(alpha=0.1)

featureExtractor = Sequential()
featureExtractor.add(tf.keras.applications.efficientnet_v2.EfficientNetV2M(
    include_top = False,
    weights = 'imagenet',
    input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3),
))

featureExtractor.trainable = False

model = Sequential()
model.add(featureExtractor)
model.add(MaxPool2D((2, 2)))
model.add(BatchNormalization())

model.add(Flatten())
model.add(Dense(1024, activation=lrelu))
model.add(Dropout(0.5))
model.add(Dense(512, activation=lrelu))
model.add(Dropout(0.5))
model.add(BatchNormalization())

model.add(Dense(GRID_SIZE * GRID_SIZE * (BOX_SIZE * 5 + NUM_CLASS))) #total nodes we need -> reshape them into the grid (GRID_SIZE, GRID_SIZE, BOX_SIZE * 5 + NUM_CLASS)
model.add(Reshape((GRID_SIZE, GRID_SIZE, BOX_SIZE * 5 + NUM_CLASS))) # linear activation function (-inf, inf) -> takes longer to converge and not that good
model.add(YoloActivation()) #convert last two class probability into a softmax outputs

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-m_notop.h5
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 7, 7, 1280)        53150388  
                                                                 
 max_pooling2d (MaxPooling2D  (None, 3, 3, 1280)       0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 3, 3, 1280)       5120      
 ormalization)                                                   
                                                                 
 flatten (Flatten)           (None, 11520)             0         
                                                                 
 dense (Dense)               (None, 1024)              117975

In [35]:
#YOLO LOSS !!!
def yoloLoss(y_true, y_pred):
    coordLoss = CoordLoss(y_true, y_pred)
    confidenceLoss = ConfidenceLoss(y_true, y_pred)
    classLoss = ClassLoss(y_true, y_pred)

    return 25 * coordLoss + 15 * confidenceLoss + 5 * classLoss

def CoordLoss(y_true, y_pred):
    #find if it exist an object in the grid
    existsObject = tf.expand_dims(y_true[..., 4], -1)

    xy_pred = existsObject * y_pred[..., 0:2]
    xy_true = existsObject * y_true[..., 0:2]

    wh_pred = existsObject * tf.math.sign(y_pred[..., 2:4]) * tf.sqrt(tf.math.abs(y_pred[..., 2:4])) #if it's linear (-inf, inf)
    wh_true = existsObject * tf.sqrt(y_true[..., 2:4])

    coordLoss = tf.reduce_sum(tf.math.square(wh_pred - wh_true))
    coordLoss += tf.reduce_sum(tf.math.square(xy_pred - xy_true))

    return coordLoss / tf.cast(tf.math.count_nonzero(existsObject), dtype=tf.float32) #mean, but it's fine if we don't

def ConfidenceLoss(y_true, y_pred):
    existsObject = tf.expand_dims(y_true[..., 4], -1)

    confidenceLoss = tf.reduce_sum(tf.math.square(existsObject * (y_true[..., 4:5] - y_pred[..., 4:5])))
    confidenceLoss += 0.5*tf.reduce_sum(tf.math.square((1 - existsObject) * (y_true[..., 4:5] - y_pred[..., 4:5])))

    return confidenceLoss / tf.cast(tf.math.count_nonzero(existsObject), dtype=tf.float32) #mean, but it's fine if we don't

def ClassLoss(y_true, y_pred):
    existsObject = tf.expand_dims(y_true[..., 4], -1)

    classLoss = tf.reduce_sum(tf.math.square(existsObject * (y_true[..., 5:] - y_pred[..., 5:])))
    return classLoss / tf.cast(tf.math.count_nonzero(existsObject), dtype=tf.float32) #mean, but it's fine if we don't

In [36]:
model.compile(loss = yoloLoss, optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), metrics=[CoordLoss, ConfidenceLoss, ClassLoss]) 

In [37]:
model.fit(  x = trainingDataGenerator,
            validation_data = (validationDataGenerator),
            epochs = 300,
            workers = 8,
            validation_freq = 5)

NameError: name 'np' is not defined

In [None]:
for i in range(9):
    image = cv.imread('annotations/Cats_Test20' + str(i) + '.png')
    image = cv.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    image = image / 255.
    TestPrint(image, model.predict(np.expand_dims(image, 0))[0])

In [None]:
model.save('yolov1')