#Preprocessing Main

##Drive Configuration

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/My Drive/yeni_proje/data

##Preprocessing

In [None]:
import numpy as np
from PIL import Image

def preprocess():
    # Put first column (id) and second column (name) from category.txt into two lists
    category_ids = []
    category_names = []
    with open(my_dataset_disk + '/' + category, 'r') as category_list:
        for i, line in enumerate(category_list):
            if i > 0:
                line = line.rstrip('\n')  # delete \n in the end of th
                # e line
                line = line.split('\t')
                category_ids.append(int(line[0]))
                category_names.append(line[1])

    for id_index, id in enumerate(category_ids):
        new_bbox_info = []
        with open(my_dataset_disk + '/' + str(id) + '/' + bbox_info, 'r') as bbox_list:
            for i, line in enumerate(bbox_list):
                if i > 0:
                    line = line.rstrip('\n')
                    line = line.split(' ')
                    img_path = my_dataset_disk + '/' + str(id) + '/' + str(line[0]) + '.jpg'
                    ori_img = Image.open(img_path)
                    if ori_img.size != (800, 600):
                        new_bbox = list(map(str, newbbox(ori_img.size, list(map(int, line[1:])), target_wh)))
                        new_bbox.insert(0, str(line[0]))
                        new_bbox_info.append(new_bbox)
                        ori_img = ori_img.resize([800, 600], Image.ANTIALIAS)
                        ori_img.save(img_path)
                    else:
                        new_bbox_info.append(line)
        rewrite_bbox(id, new_bbox_info)
    print('Done!')


def newbbox(ori_size, oldbbox, target_wh):
    new_bbox = np.squeeze(np.zeros((1, 4)))
    wscale = target_wh[0] / ori_size[0]
    hscale = target_wh[1] / ori_size[1]
    new_bbox[0], new_bbox[2] = int(wscale * oldbbox[0]), int(wscale * oldbbox[2])
    new_bbox[1], new_bbox[3] = int(hscale * oldbbox[1]), int(hscale * oldbbox[3])

    if new_bbox[0] > 800:
        new_bbox[0] = 800
    if new_bbox[2] > 800:
        new_bbox[2] = 800
    if new_bbox[1] > 600:
        new_bbox[1] = 600
    if new_bbox[3] > 600:
        new_bbox[3] = 600

    assert (new_bbox[0], new_bbox[2] <= [target_wh[0], target_wh[0]])[1].all()
    assert (new_bbox[1], new_bbox[1] <= [target_wh[1], target_wh[1]])[1].all()

    return new_bbox

def rewrite_bbox(id, new_bbox_info):
    print('rewriting category ' + str(id) + ' ...')
    file = open(my_dataset_disk + '/' + str(id) + '/' + 'new_bb_info.txt', 'w')
    file.write('img x1 y1 x2 y2\n')  # header
    for i in new_bbox_info:
        file.write(i[0] + ' ' + i[1] + ' ' + i[2] + ' ' + i[3] + ' ' + i[4] + '\n')
    file.close()

target_wh = [800, 600]
my_dataset_disk = '/content/drive/My Drive/yeni_proje/data'
category = 'category.txt'
bbox_info = 'bb_info.txt'
preprocess()

##Train Test Validation Split

In [None]:
import random
import itertools
import numpy as np

def split_dataset():
    dataset_disk = '//content/drive/My Drive/proje'
#     uecfood100_path = dataset_disk + 'UECFOOD100_448'
    uecfood100_path = dataset_disk + '/data'
    category = 'category.txt'
    bbox_info = 'new_bb_info.txt'

    split = [0.7, 0.2, 0.1]
    files_generated = ['train_uec100.txt', 'val_uec100.txt', 'test_uec100.txt']

    # Put first column (id) and second column (name) from category.txt into two lists
    category_ids = []
    category_names = []
    with open(uecfood100_path + '/' + category, 'r') as category_list:
        for i, line in enumerate(category_list):
            if i > 0:
                line = line.rstrip('\n')  # delete \n in the end of th
                # e line
                line = line.split('\t')
                category_ids.append(int(line[0]))
                category_names.append(line[1])

    # Read bb_info.txt based on category id
    category_images = []
    category_bbox = []
    for id_index, id in enumerate(category_ids):
        category_images.append([])
        category_bbox.append([])
        with open(uecfood100_path + '/' + str(id) + '/' + bbox_info, 'r') as bbox_list:
            for i, line in enumerate(bbox_list):
                if i > 0:
                    line = line.rstrip('\n')
                    line = line.split(' ')
                    category_images[id_index].append(line[0])
                    category_bbox[id_index].append(list(map(float, line[1:])))

    # Split categories to train/val/test with ratio define before
    train_uec100 = []
    val_uec100 = []
    test_uec100 = []
    for id_index, id in enumerate(category_ids):
        # divide each category with 70% training, 20% val, 10% testing
        n_imgs = len(category_images[id_index])
        n_train = int(np.floor(n_imgs * split[0]))
        n_val = int(np.floor(n_imgs * split[1]))
        n_test = int(n_imgs - n_train - n_val)

        # shuffle images
        shuffled_imgs = random.sample(category_images[id_index], n_imgs)

        train_uec100.append(shuffled_imgs[:n_train])  # not including the last one
        val_uec100.append(shuffled_imgs[n_train:n_train + n_val])
        test_uec100.append(shuffled_imgs[n_train + n_val:])

    all_train_list = list(np.unique(list(itertools.chain(*train_uec100))))
    all_val_list = list(np.unique(list(itertools.chain(*val_uec100))))
    all_test_list = list(np.unique(list(itertools.chain(*test_uec100))))

    # Pop out element in training set if it's in testing or val also
    i = 0
    while i < len(all_train_list):  # give priority to val and test over train
        if all_train_list[i] in all_val_list:  # training sample is in val set too
            all_train_list.pop(i)
        elif all_train_list[i] in all_test_list:  # training sample is in test set too
            all_train_list.pop(i)
        else:
            i += 1

    # Pop out element in testing set if it's in val also
    i = 0
    while i < len(all_test_list):  # give priority to val over test
        if all_test_list[i] in all_val_list:  # test sample is in val set too
            all_test_list.pop(i)
        else:
            i += 1

    # Split bounding box with train, val, test sets
    imgs_format = 'jpg'
    file = open(uecfood100_path + '/classes.txt', 'w')
    for c in category_names:
        file.write(c + '\n')
    file.close()

    #### Training set
    file = open(uecfood100_path + '/' + files_generated[0], 'w')
    file.write('img category_id x1 y1 x2 y2\n')  # header
    for img in all_train_list:
        # it is possible that one image in several categories
        occurrences = []
        for id_index, id in enumerate(category_ids):
            occ = [[uecfood100_path + '/' + str(id) + '/' + img + '.' + imgs_format, str(id)] +
                   category_bbox[id_index][i] for i, elem in enumerate(category_images[id_index]) if elem == img]
            occurrences += occ

        for occ in occurrences:
            img_path = occ[0]
            img_category = occ[1]
            img_bbox = str(occ[2]) + ' ' + str(occ[3]) + ' ' + str(occ[4]) + ' ' + str(occ[5])
            file.write(img_path + ' ' + img_category + ' ' + img_bbox + '\n')
    file.close()

    #### Val set
    file = open(uecfood100_path + '/' + files_generated[1], 'w')
    file.write('img category_id x1 y1 x2 y2\n')  # header
    for img in all_train_list:
        # it is possible that one image in several categories
        occurrences = []
        for id_index, id in enumerate(category_ids):
            occ = [[uecfood100_path + '/' + str(id) + '/' + img + '.' + imgs_format, str(id)] +
                   category_bbox[id_index][i] for i, elem in enumerate(category_images[id_index]) if elem == img]
            occurrences += occ

        for occ in occurrences:
            img_path = occ[0]
            img_category = occ[1]
            img_bbox = str(occ[2]) + ' ' + str(occ[3]) + ' ' + str(occ[4]) + ' ' + str(occ[5])
            file.write(img_path + ' ' + img_category + ' ' + img_bbox + '\n')
    file.close()

    #### Testing set
    file = open(uecfood100_path + '/' + files_generated[2], 'w')
    file.write('img category_id x1 y1 x2 y2\n')  # header
    for img in all_train_list:
        # it is possible that one image in several categories
        occurrences = []
        for id_index, id in enumerate(category_ids):
            occ = [[uecfood100_path + '/' + str(id) + '/' + img + '.' + imgs_format, str(id)] +
                   category_bbox[id_index][i] for i, elem in enumerate(category_images[id_index]) if elem == img]
            occurrences += occ

        for occ in occurrences:
            img_path = occ[0]
            img_category = occ[1]
            img_bbox = str(occ[2]) + ' ' + str(occ[3]) + ' ' + str(occ[4]) + ' ' + str(occ[5])
            file.write(img_path + ' ' + img_category + ' ' + img_bbox + '\n')
    file.close()

    print('Done!')

split_dataset()

##Anchor Box (optional)

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import random


def kmeans(wh, centroids, anchor_txt):
    num = wh.shape[0]  # total number of different wh pairs
    k, dim = centroids.shape
    iter = 0
    old_distances = np.zeros((num, k))
    _assignments = -np.ones(num)

    # iterate until
    while True:
        new_distances = []
        iter += 1
        for i in range(num):
            distance = 1 - IoU(wh[i], centroids)  # high IoU represents low distance
            new_distances.append(distance)
        new_distances = np.array(new_distances)
        print('Iter {}: distances: {}'.format(iter, np.sum((np.abs(old_distances - new_distances)))))

        # for each input img assign a centroid (select the closed one)
        assignments = np.argmin(new_distances, axis=1)
        if (assignments == _assignments).all():
            print('final centroids =', centroids)
            save_anchors(centroids, anchor_txt, wh_in_mobilenet)
            return centroids
        else:
            centroid_sums = np.zeros((k, dim), np.float)
            for i in range(num):
                centroid_sums[assignments[i]] += wh[i]  # sum up attribute
            for j in range(k):
                # new centroids
                centroids[j] = centroid_sums[j] / np.sum(assignments == j)

            _assignments = assignments.copy()
            old_distances = new_distances.copy()

def save_anchors(centroids, anchor_txt, wh_in_mobilenet):
    width_in_yolov2 = wh_in_mobilenet[0]
    height_in_yolov2 = wh_in_mobilenet[1]
    with open(anchor_txt, 'w') as file:
        anchors = centroids.copy()
        for i in range(anchors.shape[0]):
            anchors[i][0] *= width_in_yolov2 / 32.
            anchors[i][1] *= height_in_yolov2 / 32.
        widths = anchors[:, 0]
        sorted_indices = np.argsort(widths)  # return the indices that sort tht array
        print('anchors = ', anchors[sorted_indices])

        for i in sorted_indices:
            file.write('%0.2f, %0.2f\n' % (anchors[i, 0], anchors[i, 1]))

def avgIoU(wh, centroids):
    sum = 0.
    for i in range(wh.shape[0]):
        sum += max(IoU(wh[i], centroids))
    return sum / wh.shape[0]

def IoU(whi, centroids):
    """ Calculate IoU between current centroids with one in wh array to check if current
    centroids are suitable enough
    :param whi:
    :param centroids:
    :return:
    """
    IOU = []
    for centroid in centroids:
        c_w, c_h = centroid
        w, h = whi
        if c_w >= w and c_h >= h:
            iou = w * h / (c_w * c_h)
        elif c_w >= w and c_h <= h:
            iou = w * c_h / (w * h + (c_w - w) * c_h)
        elif c_w <= w and c_h >= h:
            iou = c_w * h / (w * h + (c_h - h) * c_w)
        else:
            iou = c_w * c_h / (w * h)
        IOU.append(iou)
    return np.array(IOU)

def coordinate2wh(coordinates, uec100_dims):
    coordinates = list(map(float, coordinates))
    w = (coordinates[2] - coordinates[0]) / uec100_dims[0]  # x2-x1
    h = (coordinates[3] - coordinates[1]) / uec100_dims[0]  # y2-y1
    return w, h

def gen_anchors(n_clusters, uec100_dims):
#     dataset_disk = '/Volumes/JS/UECFOOD100_448/'
    dataset_disk = '/content/drive/My Driver/proje/data/'
    output_path = dataset_disk + 'generated_anchors_mobilenet'
    train_uec100 = dataset_disk + 'train_uec100.txt'

    #if not os.path.exists(output_path):
    #    os.mkdir(output_path)

    wh = []

    with open(train_uec100, 'r') as file:
        for i, line in enumerate(file):
            if i > 0:
                line = line.rstrip('\n')
                line = line.split(' ')
                coordinates = line[2:]
                w, h = coordinate2wh(coordinates, uec100_dims)
                wh.append([w, h])
        wh = np.array(wh)

        if n_clusters == 0:  # make from 1 to 10 clusters and pick the best one
            avgIou = []
            for n_cluster in range(1, 11):
                anchor_txt = os.path.join(output_path, 'anchors_%d.txt' % (n_cluster))
                # randomly select n_cluster anchors from wh array which contain w,h for each img
                indices = [random.randrange(wh.shape[0]) for i in range(n_cluster)]
                centroids = wh[indices]
                centroids = kmeans(wh, centroids, anchor_txt)
                avgIou.append([n_cluster, avgIoU(wh, centroids)])
            avgIou = np.array(avgIou)
            plt.plot(avgIou[:, 0], avgIou[:, 1])
            plt.scatter(avgIou[:, 0], avgIou[:, 1], c='r')
            plt.xlabel('number of cluster')
            plt.ylabel('average IoU')
            plt.savefig('avg_iou')
            plt.show()
        else:
            anchor_txt = os.path.join(output_path, 'anchors_%d.txt' % (n_clusters))
            # randomly select n_cluster anchors from wh array which contain w,h for each img
            indices = [random.randrange(wh.shape[0]) for i in range(n_clusters)]
            centroids = wh[indices]
            kmeans(wh, centroids, anchor_txt)

        print('Done!')


wh_in_mobilenet = [224, 224]
uec256_dims = [800, 600]  # dataset image width=800, height=600
n_clusters = 0

gen_anchors(n_clusters, uec256_dims)

In [None]:
def visualize(wh_in_mobilenet):
    stride = 32

    colors = [(255, 0, 0), (255, 255, 0), (0, 255, 0), (0, 0, 255), (0, 255, 255), (55, 0, 0),
              (255, 55, 0), (0, 55, 0), (0, 0, 25), (0, 255, 55)]
#     colors = (0, 0, 0)

    anchor_path = dataset_disk + 'generated_anchors_mobilenet/'
    for i in range(1, 11):
        cv2.namedWindow('%d_anchors' % int(i))
        cv2.moveWindow('%d_anchors' % int(i), 100, 100)
        blank_image = np.zeros((wh_in_mobilenet[0], wh_in_mobilenet[1], 3), np.uint8)
#         blank_image += 255
        anchor_txt = anchor_path + 'anchors_%d.txt' % int(i)
        anchors = []
        with open(anchor_txt, 'r') as f:
            for i, line in enumerate(f):
                line = line.rstrip('\n')
                anchor = line.split(', ')
                anchors.append(anchor)

        output_img = os.path.join(anchor_txt).replace('.txt', '.png')
        stride_h = 10
        stride_w = 3
        for i in range(len(anchors)):
            (w, h) = map(float, anchors[i])

            w = int(w * stride)
            h = int(h * stride)
            print(w, h)

            # make sure starting coordinates of anchors do not overlap each other
            offset_x = 10 + i * stride_w
            offset_y = 10 + i * stride_h

            cv2.rectangle(blank_image, (offset_x, offset_y), (offset_x + w, offset_y + h), colors[i], 2)

#             cv2.imshow('%d_anchors' % int(i+1), blank_image)
            cv2.waitKey(1000)
            cv2.imwrite(output_img, blank_image)
        plt.imshow(blank_image)
        plt.title('%d_anchors' % int(i+1))
        plt.show()
        print('')

    print('Done!')



wn_in_mobilenet = [224, 224]
dataset_disk = '/content/drive/My Drive/yeni_proje/data/'
visualize(wn_in_mobilenet)

##Generate Boxes

In [None]:
import os
import numpy as np
from PIL import Image

def process_data(img_size, boxes):
    """ Box preprocessing: based on two diagonal coordinates convert box info to boxcenter_x, boxcenter_y, w, h
    and find the maximum number of boxes then do padding for all the boxes based on the maximum #boxes
    :param boxes: array with pure box diagonal coordinates info from train_uec100.txt
    :return:
    """
    # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max.
    boxes = [box.reshape((-1, 5)) for box in boxes]

    # Get box parameters as x_center, y_center, box_width, box_height, class.
    boxes_xy = [0.5 * (box[:, 3:5] + box[:, 1:3]) for box in boxes]
    boxes_wh = [box[:, 3:5] - box[:, 1:3] for box in boxes]
    boxes_xy = [boxxy / img_size for boxxy in boxes_xy]
    boxes_wh = [boxwh / img_size for boxwh in boxes_wh]
    boxes = [np.concatenate((boxes_xy[i], boxes_wh[i], box[:, 0:1]), axis=1) for i, box in enumerate(boxes)]

    # find the max number of boxes
    max_boxes = 0
    for box in boxes:
        if box.shape[0] > max_boxes:
            max_boxes = box.shape[0]

    # add zero pad for training
    for i, box in enumerate(boxes):
        if box.shape[0] < max_boxes:
            zero_padding = np.zeros((max_boxes - box.shape[0], 5), dtype=np.float32)
            boxes[i] = np.vstack((box, zero_padding))

    return np.array(boxes)

def create_label_dict(class_path):
    print('\n-> creating dictinary for labels...\n')
    label_dict = {}
    with open(class_path) as f:
        class_names = f.readlines()
    for i in range(0, len(class_names)):
        label_dict[class_names[i][:-1]] = i
    return label_dict

def create_label_dict(class_path):
    print('\n-> creating dictinary for labels...\n')
    label_dict = {}
    with open(class_path) as f:
        class_names = f.readlines()
    for i in range(0, len(class_names)):
        label_dict[class_names[i][:-1]] = i
    return label_dict

def txt2data(txt_path):
    """ Read train_uec100.txt file and convert to image_data array
    :return: image_data array with ['Volumes/JS/UECFOOD100_JS/1/1.jpg', [0,0,143,370,486]] kind of entries
    """
    print('\n-> converting txt info to data...\n')

    # Read train_uec100.txt file and save to a dict with directory as the key, bbox as value
    with open(txt_path, 'r') as f:
        entries = f.readlines()
        out = {}
        for i, entry in enumerate(entries):
            if i > 0:  # skip header
                entry = entry[:-1].split(' ')
                assert Image.open(entry[0]).size == (800, 600)  # after preprocessing size should be exactly 800,600
                entry[2] = ' '.join(entry[2:])
                entry[1] = str(int(entry[1]) - 1) + ' ' + entry[2]  # YOLO requires category id starts from 0 not 1
                entry = entry[:2]
                if entry[0] in out.keys():
                    out[entry[0]].append(entry[1])
                else:
                    out[entry[0]] = [entry[1]]

    # Save img directory with bbox info from out dict to image_data array
    image_data = list()
    index = 0
    for k, v in out.items():
        image_data.append([k])
        for i in v:
            image_data[index].append(i)
        index += 1

    # Convert string to int or float and save in image_data array again
    for no, entry in enumerate(image_data):
        for i, box in enumerate(entry):
            if i != 0:      # skip img path
                box = box.split(' ')
                box[0] = int(box[0])  # convert class name to numbers (0~)

                for k in range(1, 5):  # Change box boundaries from str to int
                    box[k] = int(float(box[k]))

                image_data[no][i] = box
    return image_data

def load_images(image_data):
    """ Load images based on their directory in the image_data array and save them in images then return
    :param image_data: acquired from txt2data()
    :return: images with each img info with shape(600, 800, 3) for each
    """
    print('\n -> Reading imgs and saving to array images...\n')
    images = []
    boxes = np.array([np.array(image_data[i][1:]) for i in range(np.array(image_data).shape[0])])
    image_data = np.array(image_data)
    boxes = process_data(img_size, boxes)
    detectors_mask, matching_true_boxes = get_detector_mask(boxes, anchors)

    for i, data in enumerate(image_data):
        img = Image.open(os.path.join(data[0]))
        assert img.size == (800, 600)
        img = np.array(img, dtype=np.uint8)
        images.append(img)
        boxes = np.array(image_data[i][1:])
        boxes = np.array(boxes)
    return images

def images2npv(images, image_data, shuffle=False):
    """ Save image info and box info to npv file
    :param images: image
    :param image_data:
    :param shuffle: if shuffle or not (data has been shuffled in during preprocessing)
    :return:
    """
    print('\n -> converting image info to npv file...\n')
    images = np.array(images, dtype=np.uint8)
    image_data = [np.array(image_data[i][1:]) for i in range(images.shape[0])]
    image_data = np.array(image_data)

    # shuffle dataset
    if shuffle:
        np.random.seed(13)
        indices = np.arange(len(images))
        np.random.shuffle(indices)
        images, image_data = images[indices], image_data[indices]
    print('dataset contains {} images'.format(images.shape[0]))
    np.savez('UECFOOD100npv_JS', image=images, boxes=image_data)
    print('npz file has been generated and saved as UECFOOD100npv_JS.npz')


def get_detector_mask(boxes, anchors):
    detectors_mask = [0 for i in range(len(boxes))]
    matching_true_boxes = [0 for i in range(len(boxes))]
    for i, box in enumerate(boxes):
        detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416])

    return np.array(detectors_mask), np.array(matching_true_boxes)


def preprocess_true_boxes(true_boxes, anchors, image_size):
    """Find detector in YOLO where ground truth box should appear

    Parameters
    ----------
    true_boxes : array
        List of ground truth boxes in form of relative x, y, w, h, class.
        Relative coordinates are in the range [0, 1] indicating a percentage
        of the original image dimensions.
    anchors : array
        List of anchors in form of w, h.
        Anchors are assumed to be in the range [0, conv_size] where conv_size
        is the spatial dimension of the final convolutional features.
    image_size : array-like
        List of image dimensions in form of h, w in pixels.

    Returns
    -------
    detectors_mask : array
        0/1 mask for detectors in [conv_height, conv_width, num_anchors, 1]
        that should be compared with a matching ground truth box.
    matching_true_boxes: array
        Same shape as detectors_mask with the corresponding ground truth box
        adjusted for comparison with predicted parameters at training time.
    """
    height, width = image_size
    num_anchors = len(anchors)

    # Downsampling factor of 5x 2-stride max_pools == 32.
    assert height % 32 == 0,    'Image sizes in YOLO_v2 must be multiples of 32.'
    assert width % 32 == 0,     'Image sizes in YOLO_v2 must be multiples of 32.'
    conv_height = height // 32
    conv_width = width // 32
    num_box_params = true_boxes.shape[1]
    detectors_mask = np.zeros(
        (conv_height, conv_width, num_anchors, 1), dtype=np.float32)
    matching_true_boxes = np.zeros(
        (conv_height, conv_width, num_anchors, num_box_params),
        dtype=np.float32)

    for box in true_boxes:
        # scale box to convolutional feature spatial dimensions
        box_class = box[4:5]
        box = box[0:4] * np.array(
            [conv_width, conv_height, conv_width, conv_height])
        i = np.floor(box[1]).astype('int')
        j = np.floor(box[0]).astype('int')
        if j >= 13 or i >= 13:
            print('bug')
        best_iou = 0
        best_anchor = 0
        for k, anchor in enumerate(anchors):
            # Find IOU between box shifted to origin and anchor box.
            box_maxes = box[2:4] / 2.
            box_mins = -box_maxes
            anchor_maxes = (anchor / 2.)
            anchor_mins = -anchor_maxes

            intersect_mins = np.maximum(box_mins, anchor_mins)
            intersect_maxes = np.minimum(box_maxes, anchor_maxes)
            intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
            intersect_area = intersect_wh[0] * intersect_wh[1]
            box_area = box[2] * box[3]
            anchor_area = anchor[0] * anchor[1]
            iou = intersect_area / (box_area + anchor_area - intersect_area)
            if iou > best_iou:
                best_iou = iou
                best_anchor = k

        if best_iou > 0:
            print(i, j, best_anchor)
            detectors_mask[i, j, best_anchor] = 1
            adjusted_box = np.array(
                [
                    box[0] - j, box[1] - i,
                    np.log(box[2] / anchors[best_anchor][0]),
                    np.log(box[3] / anchors[best_anchor][1]), box_class
                ],
                dtype=np.float32)
            matching_true_boxes[i, j, best_anchor] = adjusted_box
    return detectors_mask, matching_true_boxes

anchors = []
with open('/content/drive/My Drive/yeni_proje/data/generated_anchors_mobilenet/anchors_5.txt', 'r') as anchor_file:
    for i, line in enumerate(anchor_file):
        line = line.rstrip('\n')
        anchors.append(list(map(float, line.split(', '))))
anchors = np.array(anchors)
print('-> anchors acquired\n')
print(anchors)

txt_path = './data/train_uec100.txt'
img_size = np.array([800, 600])

# Generate dictionary with labels and ids (not necessary)
label_dict = create_label_dict('/content/drive/My Drive/yeni_proje/data/classes.txt')
print(label_dict)

# Convert txt info to data
image_data = txt2data(txt_path)
images = load_images(image_data)
images2npv(images, image_data)

print('Done!')

##Annotation (XML files)

In [None]:
from PIL import Image
from lxml import etree as ET

def gen_template():
    root = ET.Element('annotation', verified='yes')

    folder = ET.SubElement(root, 'folder')
    folder.text = 'data/1'

    filename = ET.SubElement(root, 'filename')
    filename.text = '1.jpg'

    path = ET.SubElement(root, 'path')
    path.text = '/content/drive/My Drive/proje/data/1.jpg'

    size = ET.SubElement(root, 'size')
    width = ET.SubElement(size, 'width')
    width.text = '800'
    height = ET.SubElement(size, 'width')
    height.text = '600'
    depth = ET.SubElement(size, 'width')
    depth.text = '3'

    object = ET.SubElement(root, 'object')
    name = ET.SubElement(object, 'name')
    name.text = 'rice'

    bndbox = ET.SubElement(object, 'bndbox')
    xmin = ET.SubElement(bndbox, 'xmin')
    xmin.text = '0'
    ymin = ET.SubElement(bndbox, 'ymin')
    ymin.text = '143'
    xmax = ET.SubElement(bndbox, 'xmax')
    xmax.text = '370'
    ymax = ET.SubElement(bndbox, 'ymax')
    ymax.text = '486'

    tree = ET.ElementTree(root)
    tree.write('template.xml', pretty_print=True)



def write_xml(line):
    root = ET.Element('annotation', verified='yes')

    folder = ET.SubElement(root, 'folder')
    folder.text = '/content/drive/My Drive/proje/data/1'

    filename = ET.SubElement(root, 'filename')
    filename.text = line[0] + '.jpg'

    path = ET.SubElement(root, 'path')
    path.text = '/content/drive/My Drive/proje/data/1/' + line[0] + '.jpg'
    assert Image.open(path.text).size == (800, 600)

    size = ET.SubElement(root, 'size')
    width = ET.SubElement(size, 'width')
    width.text = str(Image.open(path.text).size[0])
    height = ET.SubElement(size, 'height')
    height.text = str(Image.open(path.text).size[1])
    depth = ET.SubElement(size, 'depth')
    depth.text = '3'

    # if not os.path.exists(str(line[0]) + '.xml'):
    object = ET.SubElement(root, 'object')
    name = ET.SubElement(object, 'name')
    name.text = 'rice'

    bndbox = ET.SubElement(object, 'bndbox')
    xmin = ET.SubElement(bndbox, 'xmin')
    xmin.text = line[1]
    ymin = ET.SubElement(bndbox, 'ymin')
    ymin.text = line[2]
    xmax = ET.SubElement(bndbox, 'xmax')
    xmax.text = line[3]
    ymax = ET.SubElement(bndbox, 'ymax')
    ymax.text = line[4]

    tree = ET.ElementTree(root)
    tree.write(folder.text + '/annotations' + line[0] + '.xml', pretty_print=True, encoding='utf-8')


def gen_xmls(outpath, datapath):
    new_bb_info = 'new_bb_info.txt'
    with open(datapath + '/' + new_bb_info, 'r') as bbox_file:
        num_items = sum(1 for line in open(datapath + '/' + new_bb_info)) - 1  # 620 images with 626 objects in total
        print(num_items)
        for i, line in enumerate(bbox_file):
            if i > 0:
                line = line.rstrip('\n')
                line = line.split(' ')
                write_xml(line)
    print('Done!')

datapath = '/content/drive/My Drive/yeni_proje/data/1'
outpath = datapath
gen_xmls(outpath, datapath)

In [None]:
gen_template()

#YOLO Main

##Drive Configuration

In [None]:
from google.colab import drive 
drive.mount("/content/drive")

In [None]:
#!pip install tensorflow==2.3.0
#!pip install tf_nightly

In [None]:
import tensorflow as tf
%cd /content/drive/My Drive/yeni_proje/
print(tf.__version__)

In [None]:
tf.config.run_functions_eagerly(True)
tf.test.gpu_device_name()

##Utils

In [None]:
import cv2
import numpy as np


class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, c=None, classes=None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax

        self.c = c
        self.classes = classes

        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)

        return self.label

    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]

        return self.score

    def get_conf(self):

        return self.c


class WeightReader:
    def __init__(self, weight_file):
        self.offset = 4
        self.all_weights = np.fromfile(weight_file, dtype='float32')

    def read_bytes(self, size):
        self.offset = self.offset + size
        return self.all_weights[self.offset - size:self.offset]

    def reset(self):
        self.offset = 4


def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])

    intersect = intersect_w * intersect_h

    w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin
    w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin

    union = w1 * h1 + w2 * h2 - intersect

    return float(intersect) / union


def decode_netout(netout, anchors, nb_class, obj_threshold=0.3, nms_threshold=0.3):
    grid_h, grid_w, nb_box = netout.shape[:3]

    boxes = []

    # decode the output by the network
    netout[..., 4] = _sigmoid(netout[..., 4])
    netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
    netout[..., 5:] *= netout[..., 5:] > obj_threshold

    for row in range(grid_h):
        for col in range(grid_w):
            for b in range(nb_box):
                # from 4th element onwards are confidence and class classes
                classes = netout[row, col, b, 5:]

                if np.sum(classes) > 0:
                    # first 4 elements are x, y, w, and h
                    x, y, w, h = netout[row, col, b, :4]

                    x = (col + _sigmoid(x)) / grid_w  # center position, unit: image width
                    y = (row + _sigmoid(y)) / grid_h  # center position, unit: image height
                    w = anchors[2 * b + 0] * np.exp(w) / grid_w  # unit: image width
                    h = anchors[2 * b + 1] * np.exp(h) / grid_h  # unit: image height
                    confidence = netout[row, col, b, 4]

                    box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, confidence, classes)

                    boxes.append(box)

    # suppress non-maximal boxes
    for c in range(nb_class):
        sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))

        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]

            if boxes[index_i].classes[c] == 0:
                continue
            else:
                for j in range(i + 1, len(sorted_indices)):
                    index_j = sorted_indices[j]

                    if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_threshold:
                        boxes[index_j].classes[c] = 0

    # remove the boxes which are less likely than a obj_threshold
    boxes = [box for box in boxes if box.get_score() > obj_threshold]

    return boxes


def draw_boxes(image, boxes, labels):
    image_h, image_w, _ = image.shape

    for box in boxes:
        xmin = int(box.xmin * image_w)
        ymin = int(box.ymin * image_h)
        xmax = int(box.xmax * image_w)
        ymax = int(box.ymax * image_h)

        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 3)
        cv2.putText(image,
                    labels[box.get_label()] + ' ' + str(box.get_score()),
                    (xmin, ymin - 13),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1e-3 * image_h,
                    (0, 255, 0), 2)

    return image


def compute_overlap(a, b):
    """
    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
    Parameters
    ----------
    a: (N, 4) ndarray of float
    b: (K, 4) ndarray of float
    Returns
    -------
    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])

    iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
    ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])

    iw = np.maximum(iw, 0)
    ih = np.maximum(ih, 0)

    ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih

    ua = np.maximum(ua, np.finfo(float).eps)

    intersection = iw * ih

    return intersection / ua


def compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves.
    Code originally from https://github.com/rbgirshick/py-faster-rcnn.

    # Arguments
        recall:    The recall curve (list).
        precision: The precision curve (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))

    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap


def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b

    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2, x4) - x1
    else:
        if x2 < x3:
            return 0
        else:
            return min(x2, x4) - x3


def _sigmoid(x):
    return 1. / (1. + np.exp(-x))


def _softmax(x, axis=-1, t=-100.):
    x = x - np.max(x)

    if np.min(x) < t:
        x = x / np.min(x) * t

    e_x = np.exp(x)

    return e_x / e_x.sum(axis, keepdims=True)


def read_category():
    category = []
    with open('/content/drive/My Drive/yeni_proje/data/category.txt', 'r') as file:
        for i, line in enumerate(file):
            if i > 0:
                line = line.rstrip('\n')
                line = line.split('\t')
                category.append(line[1])
    return category

import copy
import os
import xml.etree.ElementTree as ET


def parse_annotation(ann_dir, img_dir, labels=[]):
    all_imgs = []
    seen_labels = {}

    for ann in sorted(os.listdir(ann_dir)):
        img = {'object': []}

        tree = ET.parse(ann_dir + ann)

        for elem in tree.iter():
            if 'filename' in elem.tag:
                img['filename'] = img_dir + elem.text
            if 'width' in elem.tag:
                img['width'] = int(elem.text)
            if 'height' in elem.tag:
                img['height'] = int(elem.text)
            if 'object' in elem.tag or 'part' in elem.tag:
                obj = {}

                for attr in list(elem):
                    if 'name' in attr.tag:
                        obj['name'] = attr.text

                        if obj['name'] in seen_labels:
                            seen_labels[obj['name']] += 1
                        else:
                            seen_labels[obj['name']] = 1

                        if len(labels) > 0 and obj['name'] not in labels:
                            break
                        else:
                            img['object'] += [obj]

                    if 'bndbox' in attr.tag:
                        for dim in list(attr):
                            if 'xmin' in dim.tag:
                                obj['xmin'] = int(round(float(dim.text)))
                            if 'ymin' in dim.tag:
                                obj['ymin'] = int(round(float(dim.text)))
                            if 'xmax' in dim.tag:
                                obj['xmax'] = int(round(float(dim.text)))
                            if 'ymax' in dim.tag:
                                obj['ymax'] = int(round(float(dim.text)))

        if len(img['object']) > 0:
            all_imgs += [img]

    return all_imgs, seen_labels


from imgaug import augmenters as iaa

from keras.utils import Sequence




class BatchGenerator(Sequence):
    def __init__(self, images,
                 config,
                 shuffle=True,
                 jitter=True,
                 norm=None):
        self.generator = None

        self.images = images
        self.config = config

        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm

        self.anchors = [BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in
                        range(int(len(config['ANCHORS']) // 2))]

        ### augmentors by https://github.com/aleju/imgaug
        sometimes = lambda aug: iaa.Sometimes(0.5, aug)
        # Define our sequence of augmentation steps that will be applied to every image
        # All augmenters with per_channel=0.5 will sample one value _per image_
        # in 50% of all cases. In all other cases they will sample new values
        # _per channel_.
        self.aug_pipe = iaa.Sequential(
            [
                # apply the following augmenters to most images
                # iaa.Fliplr(0.5), # horizontally flip 50% of all images
                # iaa.Flipud(0.2), # vertically flip 20% of all images
                # sometimes(iaa.Crop(percent=(0, 0.1))), # crop images by 0-10% of their height/width
                sometimes(iaa.Affine(
                    # scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
                    # translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
                    # rotate=(-5, 5), # rotate by -45 to +45 degrees
                    # shear=(-5, 5), # shear by -16 to +16 degrees
                    # order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
                    # cval=(0, 255), # if mode is constant, use a cval between 0 and 255
                    # mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
                )),
                # execute 0 to 5 of the following (less important) augmenters per image
                # don't execute all of them, as that would often be way too strong
                iaa.SomeOf((0, 5),
                           [
                               # sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
                               iaa.OneOf([
                                   iaa.GaussianBlur((0, 3.0)),  # blur images with a sigma between 0 and 3.0
                                   iaa.AverageBlur(k=(2, 7)),
                                   # blur image using local means with kernel sizes between 2 and 7
                                   iaa.MedianBlur(k=(3, 11)),
                                   # blur image using local medians with kernel sizes between 2 and 7
                               ]),
                               iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),  # sharpen images
                               # iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
                               # search either for all edges or for directed edges
                               # sometimes(iaa.OneOf([
                               #    iaa.EdgeDetect(alpha=(0, 0.7)),
                               #    iaa.DirectedEdgeDetect(alpha=(0, 0.7), direction=(0.0, 1.0)),
                               # ])),
                               iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5),
                               # add gaussian noise to images
                               iaa.OneOf([
                                   iaa.Dropout((0.01, 0.1), per_channel=0.5),  # randomly remove up to 10% of the pixels
                                   # iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
                               ]),
                               # iaa.Invert(0.05, per_channel=True), # invert color channels
                               iaa.Add((-10, 10), per_channel=0.5),
                               # change brightness of images (by -10 to 10 of original value)
                               iaa.Multiply((0.5, 1.5), per_channel=0.5),
                               # change brightness of images (50-150% of original value)
                               iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5),  # improve or worsen the contrast
                               # iaa.Grayscale(alpha=(0.0, 1.0)),
                               # sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
                               # sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))) # sometimes move parts of the image around
                           ],
                           random_order=True
                           )
            ],
            random_order=True
        )

        if shuffle:
            np.random.shuffle(self.images)

    def __len__(self):
        return int(np.ceil(float(len(self.images)) / self.config['BATCH_SIZE']))

    def num_classes(self):
        return len(self.config['LABELS'])

    def size(self):
        return len(self.images)

    def load_annotation(self, i):
        annots = []

        for obj in self.images[i]['object']:
            print(obj['name'])
            # if obj['name'][-1] == ' ':
            #     obj['name'] = obj['name'][:-1]
            # if obj['name'] == 'fermented soybeans':
            #     obj['name'] = 'natto'
            # if obj['name'] == 'beef steak':
            #     obj['name'] = 'steak'
            # if obj['name'] == 'rolled omelet':
            #     obj['name'] = 'omelet'
            annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.config['LABELS'].index(obj['name'])]
            annots += [annot]

        if len(annots) == 0:
            annots = [[]]

        return np.array(annots)

    def load_image(self, i):
        return cv2.imread(self.images[i]['filename'])

    def __getitem__(self, idx):
        l_bound = idx * self.config['BATCH_SIZE']
        r_bound = (idx + 1) * self.config['BATCH_SIZE']

        if r_bound > len(self.images):
            r_bound = len(self.images)
            l_bound = r_bound - self.config['BATCH_SIZE']

        instance_count = 0

        x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'], self.config['IMAGE_W'], 3))  # input images
        b_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.config['TRUE_BOX_BUFFER'],
                            4))  # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
        y_batch = np.zeros((r_bound - l_bound, self.config['GRID_H'], self.config['GRID_W'], self.config['BOX'],
                            4 + 1 + len(self.config['LABELS'])))  # desired network output

        for train_instance in self.images[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self.aug_image(train_instance, jitter=self.jitter)

            # construct output from object's x, y, w, h
            true_box_index = 0

            for obj in all_objs:
                if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['name'] in self.config['LABELS']:
                    center_x = .5 * (obj['xmin'] + obj['xmax'])
                    center_x = center_x / (float(self.config['IMAGE_W']) / self.config['GRID_W'])
                    center_y = .5 * (obj['ymin'] + obj['ymax'])
                    center_y = center_y / (float(self.config['IMAGE_H']) / self.config['GRID_H'])

                    grid_x = int(np.floor(center_x))
                    grid_y = int(np.floor(center_y))

                    if grid_x < self.config['GRID_W'] and grid_y < self.config['GRID_H']:
                        obj_indx = self.config['LABELS'].index(obj['name'])

                        center_w = (obj['xmax'] - obj['xmin']) / (
                                float(self.config['IMAGE_W']) / self.config['GRID_W'])  # unit: grid cell
                        center_h = (obj['ymax'] - obj['ymin']) / (
                                float(self.config['IMAGE_H']) / self.config['GRID_H'])  # unit: grid cell

                        box = [center_x, center_y, center_w, center_h]

                        # find the anchor that best predicts this box
                        best_anchor = -1
                        max_iou = -1

                        shifted_box = BoundBox(0,
                                               0,
                                               center_w,
                                               center_h)

                        for i in range(len(self.anchors)):
                            anchor = self.anchors[i]
                            iou = bbox_iou(shifted_box, anchor)

                            if max_iou < iou:
                                best_anchor = i
                                max_iou = iou

                        # assign ground truth x, y, w, h, confidence and class probs to y_batch
                        y_batch[instance_count, grid_y, grid_x, best_anchor, 0:4] = box
                        y_batch[instance_count, grid_y, grid_x, best_anchor, 4] = 1.
                        y_batch[instance_count, grid_y, grid_x, best_anchor, 5 + obj_indx] = 1

                        # assign the true box to b_batch
                        b_batch[instance_count, 0, 0, 0, true_box_index] = box

                        true_box_index += 1
                        true_box_index = true_box_index % self.config['TRUE_BOX_BUFFER']

            # assign input image to x_batch
            if self.norm is not None:
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']:
                        cv2.rectangle(img[:, :, ::-1], (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']),
                                      (255, 0, 0), 3)
                        cv2.putText(img[:, :, ::-1], obj['name'],
                                    (obj['xmin'] + 2, obj['ymin'] + 12),
                                    0, 1.2e-3 * img.shape[0],
                                    (0, 255, 0), 2)

                x_batch[instance_count] = img

            # increase instance counter in current batch
            instance_count += 1

            # print(' new batch created', idx)

        return [x_batch, b_batch], y_batch

    def on_epoch_end(self):
        if self.shuffle: np.random.shuffle(self.images)

    def aug_image(self, train_instance, jitter):
        image_name = train_instance['filename']
        image = cv2.imread(image_name)

        if image is None:
            print('Cannot find ', image_name)

        h, w, c = image.shape
        all_objs = copy.deepcopy(train_instance['object'])

        if jitter:
            ### scale the image
            scale = np.random.uniform() / 10. + 1.
            image = cv2.resize(image, (0, 0), fx=scale, fy=scale)

            ### translate the image
            max_offx = (scale - 1.) * w
            max_offy = (scale - 1.) * h
            offx = int(np.random.uniform() * max_offx)
            offy = int(np.random.uniform() * max_offy)

            image = image[offy: (offy + h), offx: (offx + w)]

            ### flip the image
            flip = np.random.binomial(1, .5)
            if flip > 0.5:
                image = cv2.flip(image, 1)

            image = self.aug_pipe.augment_image(image)

        # resize the image to standard size
        image = cv2.resize(image, (self.config['IMAGE_H'], self.config['IMAGE_W']))
        image = image[:, :, ::-1]

        # fix object's position and size
        for obj in all_objs:
            for attr in ['xmin', 'xmax']:
                if jitter:
                    obj[attr] = int(obj[attr] * scale - offx)

                # convert annotation from raw size to net input size
                obj[attr] = int(obj[attr] * float(self.config['IMAGE_W']) / w)
                obj[attr] = max(min(obj[attr], self.config['IMAGE_W']), 0)

            for attr in ['ymin', 'ymax']:
                if jitter:
                    obj[attr] = int(obj[attr] * scale - offy)

                # convert annotation from raw size to net input size
                obj[attr] = int(obj[attr] * float(self.config['IMAGE_H']) / h)
                obj[attr] = max(min(obj[attr], self.config['IMAGE_H']), 0)

            if jitter and flip > 0.5:
                xmin = obj['xmin']
                obj['xmin'] = self.config['IMAGE_W'] - obj['xmax']
                obj['xmax'] = self.config['IMAGE_W'] - xmin

        return image, all_objs

##Neural Network Model

In [None]:
import warnings
import keras.backend as K
import keras.utils
from keras import layers
from keras import models


def relu6(x):
    return K.relu(x, max_value=6)


def MobileNetV1(input_shape,
                alpha=1.0,
                depth_multiplier=1,
                include_top=True,
                weights='imagenet',
                pooling=None,
                ):
    if input_shape[-1] not in [1, 3]:
        warnings.warn('Images must have 3 channels (RGB) or 1 channel')
    assert input_shape[0] in [224, 192, 160, 128]
    assert input_shape[1] in [224, 192, 160, 128]

    ''' construct mobilenet '''
    inputs = layers.Input(shape=input_shape)

    # Conv / s2: filter shape (3 x 3 x 3 x 32)
    num_filters = int(alpha * 32)
    x = layers.ZeroPadding2D(padding=(1, 1), name='conv1_zeropad')(inputs)
    x = layers.Conv2D(filters=num_filters, kernel_size=(3, 3), padding='valid', use_bias=False, strides=(2, 2),
                      name='conv1')(x)
    x = layers.BatchNormalization(axis=-1)(x)
    x = layers.Activation(relu6, name='conv1_relu6')(x)

    # Conv dw / s1: filter shape (3 x 3 x 32 dw)
    x = depthwise_block(x, kernel=(3, 3), stride=(1, 1), depth_multiplier=depth_multiplier, id=1)
    # Conv pw / s1: filter shape (1 x 1 x 32 x 64)
    x = pointwise_block(x, num_filters=64, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=1)

    # Conv dw / s2: filter shape (3 x 3 x 64 dw)
    x = depthwise_block(x, kernel=(3, 3), stride=(2, 2), depth_multiplier=depth_multiplier, id=2)
    # Conv pw / s1: filter shape (1 x 1 x 64 x 128)
    x = pointwise_block(x, num_filters=128, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=2)

    # Conv dw / s1: filter shape (3 x 3 x 128 dw)
    x = depthwise_block(x, kernel=(3, 3), stride=(1, 1), depth_multiplier=depth_multiplier, id=3)
    # Conv pw / s1: filter shape (1 x 1 x 128 x 128)
    x = pointwise_block(x, num_filters=128, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=3)

    # Conv dw / s2: filter shape (3 x 3 x 128 dw)
    x = depthwise_block(x, kernel=(3, 3), stride=(2, 2), depth_multiplier=depth_multiplier, id=4)
    # Conv pw / s1: filter shape (1 x 1 x 128 x 256)
    x = pointwise_block(x, num_filters=256, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=4)

    # Conv dw / s1: filter shape (3 x 3 x 256 dw)
    x = depthwise_block(x, kernel=(3, 3), stride=(1, 1), depth_multiplier=depth_multiplier, id=5)
    # Conv pw / s1: filter shape (1 x 1 x 256 x 256)
    x = pointwise_block(x, num_filters=256, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=5)

    # Conv dw / s2: filter shape (3 x 3 x 256 dw)
    x = depthwise_block(x, kernel=(3, 3), stride=(2, 2), depth_multiplier=depth_multiplier, id=6)
    # Conv pw / s1: filter shape (1 x 1 x 256 x 512)
    x = pointwise_block(x, num_filters=512, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=6)

    # repeat 5 times
    x = depthwise_block(x, kernel=(3, 3), stride=(1, 1), depth_multiplier=depth_multiplier, id=7)
    x = pointwise_block(x, num_filters=512, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=7)
    x = depthwise_block(x, kernel=(3, 3), stride=(1, 1), depth_multiplier=depth_multiplier, id=8)
    x = pointwise_block(x, num_filters=512, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=8)
    x = depthwise_block(x, kernel=(3, 3), stride=(1, 1), depth_multiplier=depth_multiplier, id=9)
    x = pointwise_block(x, num_filters=512, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=9)
    x = depthwise_block(x, kernel=(3, 3), stride=(1, 1), depth_multiplier=depth_multiplier, id=10)
    x = pointwise_block(x, num_filters=512, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=10)
    x = depthwise_block(x, kernel=(3, 3), stride=(1, 1), depth_multiplier=depth_multiplier, id=11)
    x = pointwise_block(x, num_filters=512, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=11)

    # Conv dw / s2: filter shape (3 x 3 x 512 dw)
    x = depthwise_block(x, kernel=(3, 3), stride=(2, 2), depth_multiplier=depth_multiplier, id=12)
    # Conv pw / s1: filter shape (1 x 1 x 512 x 1024)
    x = pointwise_block(x, num_filters=1024, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=12)

    # Conv dw / s2: filter shape (3 x 3 x 1024 dw)
    x = depthwise_block(x, kernel=(3, 3), stride=(1, 1), depth_multiplier=depth_multiplier, id=13)
    # Conv pw / s1: filter shape (1 x 1 x 1024 x 1024)
    x = pointwise_block(x, num_filters=1024, kernel=(1, 1), stride=(1, 1), alpha=alpha, id=13)

    # average pooling
    if include_top:
        raise NotImplementedError
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # create model
    rows = input_shape[0]
    model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows))

    # load pre-trained weights on ImageNet
    if weights == 'imagenet':
        if alpha == 1.0:
            alpha_text = '1_0'
        elif alpha == 0.75:
            alpha_text = '7_5'
        elif alpha == 0.50:
            alpha_text = '5_0'
        else:
            alpha_text = '2_5'

        BASE_WEIGHT_PATH = ('https://github.com/fchollet/deep-learning-models/'
                            'releases/download/v0.6/')
        if include_top:
            model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras.utils.get_file(model_name,
                                                weight_path,
                                                cache_subdir='models')
        else:
            model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras.utils.get_file(model_name,
                                                weight_path,
                                                cache_subdir='models')
        model.load_weights(weights_path)
    else:
        raise NotImplementedError

    return model


def depthwise_block(inputs, kernel, stride, depth_multiplier, id):
    x = layers.ZeroPadding2D(padding=(1, 1), name='convdw_zeropad_%d' % id)(inputs)
    x = layers.DepthwiseConv2D(kernel_size=kernel, padding='valid', depth_multiplier=depth_multiplier, strides=stride,
                               use_bias=False, name='convdw_%d' % id)(x)
    x = layers.BatchNormalization(axis=-1, name='convdw_bn_%d' % id)(x)
    return layers.Activation(relu6, name='convdw_relu6_%d' % id)(x)


def pointwise_block(inputs, num_filters, kernel, stride, alpha, id):
    num_filter = int(alpha * num_filters)
    x = layers.Conv2D(filters=num_filter, padding='same', kernel_size=kernel, strides=stride, use_bias=False,
                      name='convpw_%d' % id)(inputs)
    x = layers.BatchNormalization(axis=-1, name='convpw_bn_%d' % id)(x)
    return layers.Activation(relu6, name='convpw_relu6_%d' % id)(x)

##Train

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.layers import Reshape, Conv2D, Input, Lambda, UpSampling2D
from keras.models import Model
from keras.optimizers import Adam



def normalize(image):
    return image / 255.


def get_model():
    """ Build MobileNetV1 model """
    print('=> Building MobileNetV1 model...')
    mobilenet = MobileNetV1(input_shape=(224, 224, 3), include_top=False)
    x = mobilenet(input_image)
    x = Conv2D(N_BOX * (4 + 1 + CLASS), (1, 1), strides=(1, 1), padding='same', name='conv_23')(x)
    output = Reshape((GRID_H, GRID_W, N_BOX, 4 + 1 + CLASS))(x)

    # small hack to allow true_boxes to be registered when Keras build the model
    # for more information: https://github.com/fchollet/keras/issues/2790
    output = Lambda(lambda args: args[0])([output, true_boxes])

    model = Model([input_image, true_boxes], output)
    print(model.summary())
    return model


def train(model):

    layer = model.layers[-4]            # the last convolutional layer
    weights = layer.get_weights()

    new_kernel = np.random.normal(size=weights[0].shape) / (GRID_H * GRID_W)
    new_bias = np.random.normal(size=weights[1].shape) / (GRID_H * GRID_W)

    layer.set_weights([new_kernel, new_bias])

    early_stop = EarlyStopping(monitor='val_loss',
                               min_delta=0.001,
                               patience=3,
                               mode='min',
                               verbose=1)

    checkpoint = ModelCheckpoint('all_imgs_mobile_net_loss.h5',
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min',
                                 save_freq=1)

    # model.load_weights('./models/mobile_net_loss0_07.h5')



    # TODO: try different optimizer and tweak parameters (in MNv1 paper they used RMSprop)
    optimizer = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    # optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9)
    # optimizer = RMSprop(lr=1e-5, rho=0.9, epsilon=1e-08, decay=0.0)

    model.compile(loss=custom_loss, optimizer=optimizer)

    model.fit(train_batch,
                        steps_per_epoch=len(train_batch),
                        epochs=20,  # 100
                        verbose=1,
                        validation_data=valid_batch,
                        validation_steps=len(valid_batch),
                        callbacks=[early_stop, checkpoint],
                        max_queue_size=3)


def custom_loss(y_true, y_pred):
    mask_shape = tf.shape(y_true)[:4]

    cell_x = tf.cast(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)),dtype=tf.float32)
    cell_y = tf.transpose(cell_x, (0, 2, 1, 3, 4))

    cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1])

    coord_mask = tf.zeros(mask_shape)
    conf_mask = tf.zeros(mask_shape)
    class_mask = tf.zeros(mask_shape)

    seen = tf.Variable(0.)
    total_recall = tf.Variable(0.)

    """ Adjust prediction """
    # adjust x and y
    pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid

    # adjust w and h
    pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1, 1, 1, N_BOX, 2])

    # adjust confidence
    pred_box_conf = tf.sigmoid(y_pred[..., 4])

    # adjust class probabilities
    pred_box_class = y_pred[..., 5:]

    """ Adjust ground truth """
    # adjust x and y
    true_box_xy = y_true[..., 0:2]  # relative position to the containing cell

    # adjust w and h
    true_box_wh = y_true[..., 2:4]  # number of cells accross, horizontally and vertically

    # adjust confidence
    true_wh_half = true_box_wh / 2.
    true_mins = true_box_xy - true_wh_half
    true_maxes = true_box_xy + true_wh_half

    pred_wh_half = pred_box_wh / 2.
    pred_mins = pred_box_xy - pred_wh_half
    pred_maxes = pred_box_xy + pred_wh_half

    intersect_mins = tf.maximum(pred_mins, true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
    pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = tf.truediv(intersect_areas, union_areas)

    true_box_conf = iou_scores * y_true[..., 4]

    # adjust class probabilities
    true_box_class = tf.argmax(y_true[..., 5:], -1)

    """ Determine the masks """
    # coordinate mask: simply the position of the ground truth boxes (the predictors)
    coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * COORD_SCALE

    # confidence mask: penelize predictors + penalize boxes with low IOU
    # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    pred_xy = tf.expand_dims(pred_box_xy, 4)
    pred_wh = tf.expand_dims(pred_box_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    intersect_mins = tf.maximum(pred_mins, true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    true_areas = true_wh[..., 0] * true_wh[..., 1]
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = tf.truediv(intersect_areas, union_areas)

    best_ious = tf.reduce_max(iou_scores, axis=4)
    conf_mask = conf_mask + tf.cast(best_ious < 0.6,dtype=tf.float32) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE

    # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
    conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE

    # class mask: simply the position of the ground truth boxes (the predictors)
    class_mask = y_true[..., 4] * tf.gather(CLASS_WEIGHTS, true_box_class) * CLASS_SCALE

    """ Warm-up training """
    no_boxes_mask = tf.cast(coord_mask < COORD_SCALE / 2.,dtype=tf.float32)
    seen = tf.compat.v1.assign_add(seen,1.)

    true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES),
                                                   lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask,
                                                            true_box_wh + tf.ones_like(true_box_wh) * np.reshape(
                                                                ANCHORS, [1, 1, 1, N_BOX, 2]) * no_boxes_mask,
                                                            tf.ones_like(coord_mask)],
                                                   lambda: [true_box_xy,
                                                            true_box_wh,
                                                            coord_mask])

    """ Finalize the loss """
    nb_coord_box = tf.reduce_sum(tf.cast(coord_mask > 0.0,dtype=tf.float32))
    nb_conf_box = tf.reduce_sum(tf.cast(conf_mask > 0.0,dtype=tf.float32))
    nb_class_box = tf.reduce_sum(tf.cast(class_mask > 0.0,dtype=tf.float32))

    loss_xy = tf.reduce_sum(tf.square(true_box_xy - pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_wh = tf.reduce_sum(tf.square(true_box_wh - pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_conf = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2.
    loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
    loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)

    loss = loss_xy + loss_wh + loss_conf + loss_class

    nb_true_box = tf.reduce_sum(y_true[..., 4])
    nb_pred_box = tf.reduce_sum(tf.cast(true_box_conf > 0.5,dtype=tf.float32) * tf.cast(pred_box_conf > 0.3,dtype=tf.float32))
    return loss
    """ Debugging code """
"""    current_recall = nb_pred_box / (nb_true_box + 1e-6)
    total_recall = tf.compat.v1.assign_add(total_recall, current_recall)

    loss = tf.compat.v1.Print(loss, [tf.zeros((1))], message='\nDummy Line \t', summarize=1000)
    loss = tf.compat.v1.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
    loss = tf.compat.v1.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
    loss = tf.compat.v1.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
    loss = tf.compat.v1.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
    loss = tf.compat.v1.Print(loss, [loss], message='Total Loss \t', summarize=1000)
    loss = tf.compat.v1.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
    loss = tf.compat.v1.Print(loss, [total_recall / seen], message='Average Recall \t', summarize=1000)
"""
    


def read_category():
    category = []
    with open('/content/drive/My Drive/yeni_proje/data/category.txt', 'r') as file:
        for i, line in enumerate(file):
            if i > 0:
                line = line.rstrip('\n')
                line = line.split('\t')
                category.append(line[1])
    return category


def plt_example_batch(batches, batch_size=16):
    assert batches[0][0][0].shape[0] == batch_size       # in general 16x224x224x3
    for i in range(0, batch_size):
        img = batches[0][0][0][i]
        plt.figure(i)
        plt.imshow(img.astype('uint8'))


if __name__ == '__main__':

    ''' Initiailize parameters '''
    LABELS = read_category()

    IMAGE_H, IMAGE_W = 224, 224  # must equal to GRID_H * 32  416, 416
    GRID_H, GRID_W = 7, 7        # 13, 13
    N_BOX = 5
    CLASS = len(LABELS)
    CLASS_WEIGHTS = np.ones(CLASS, dtype='float32')
    OBJ_THRESHOLD = 0.3
    NMS_THRESHOLD = 0.3

    # Read knn generated anchor_5.txt
    ANCHORS = []
    with open('/content/drive/My Drive/yeni_proje/data/generated_anchors_mobilenet/anchors_5.txt', 'r') as anchor_file:
        for i, line in enumerate(anchor_file):
            line = line.rstrip('\n')
            ANCHORS.append(list(map(float, line.split(', '))))
    ANCHORS = list(list(np.array(ANCHORS).reshape(1, -1))[0])

    NO_OBJECT_SCALE = 1.0
    OBJECT_SCALE = 5.0
    COORD_SCALE = 1.0
    CLASS_SCALE = 1.0

    BATCH_SIZE = 32
    WARM_UP_BATCHES = 10
    TRUE_BOX_BUFFER = 10

    generator_config = {
        'IMAGE_H': IMAGE_H,
        'IMAGE_W': IMAGE_W,
        'GRID_H': GRID_H,
        'GRID_W': GRID_W,
        'BOX': N_BOX,
        'LABELS': LABELS,
        'CLASS': len(LABELS),
        'ANCHORS': ANCHORS,
        'BATCH_SIZE': BATCH_SIZE,
        'TRUE_BOX_BUFFER': TRUE_BOX_BUFFER,
    }

    all_imgs = []
    for i in range(0, len(LABELS)):
        image_path = '/content/drive/My Drive/yeni_proje/data/' + str(i+1) + '/'
        annot_path = '/content/drive/My Drive/yeni_proje/data/' + str(i+1) + '/' + '/annotations/'

        folder_imgs, seen_labels = parse_annotation(annot_path, image_path)
        all_imgs.extend(folder_imgs)
    print(np.array(all_imgs).shape)

    # add extensions to image name
    for img in all_imgs:
        img['filename'] = img['filename']

    print('=> Generate BatchGenerator.')
    batches = BatchGenerator(all_imgs, generator_config)

    ''' Start training '''
    train_valid_split = int(0.8 * len(all_imgs))

    train_batch = BatchGenerator(all_imgs[:train_valid_split], generator_config, norm=normalize, jitter=True)
    valid_batch = BatchGenerator(all_imgs[train_valid_split:], generator_config, norm=normalize, jitter=True)

    input_image = Input(shape=(IMAGE_H, IMAGE_W, 3))
    true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER, 4))

    model = get_model()

    train(model)

##Detecting Food

In [None]:
model.load_weights('all_imgs_mobile_net_loss.h5')
dummy_array = np.zeros((1,1,1,1,TRUE_BOX_BUFFER,4))

image = cv2.imread('/content/drive/My Drive/yeni_proje/test.jpg')

plt.figure(figsize=(10,10))

input_image = cv2.resize(image, (224, 224))
input_image = input_image / 255.
input_image = input_image[:,:,::-1]
input_image = np.expand_dims(input_image, 0)

netout = model.predict([input_image, dummy_array])

boxes = decode_netout(netout[0], 
                      obj_threshold=0.3,
                      nms_threshold=NMS_THRESHOLD,
                      anchors=ANCHORS, 
                      nb_class=CLASS)
image = draw_boxes(image, boxes, labels=LABELS)

plt.imshow(image[:,:,::-1])
plt.imsave('mn_result_0717.png', image[:,:,::-1])
plt.show()