In [1]:
import cv2
import numpy as np
import pandas as pd
import os
import glob
import datetime
import time
import shutil
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")
from scipy import misc, ndimage
from PIL import Image

import imgaug as ia
from imgaug import augmenters as iaa

from yolo_utils_processing import *

In [None]:
def make_crops_train(cropped_images, dst, traindf, src = None, augment = False, normalize = False):
    pad_col = [0, 0, 0]
    saved_imgs = []
    crop_filenames = cropped_images['img_name']
    labels = cropped_images['class']
    labels_set = ['Type_1', 'Type_2', 'Type_3']
    cropped_images['class_filename'] = cropped_images.apply(lambda x: x['class'] + '/' + x['img_name'], axis = 1)
    
    for i in range(len(cropped_images)):
        try:
            img = ndimage.imread(cropped_images['filename'][i], mode = 'RGB')
            x1, x2, y1, y2 = int(cropped_images['x1'][i]), int(cropped_images['x2'][i]), int(cropped_images['y1'][i]), int(cropped_images['y2'][i])
            crop_img = img[y1:y2, x1:x2]
            h, w = crop_img.shape[0], crop_img.shape[1]
        except Exception:
            print('Failed for image:', cropped_images['filename'][i])
            continue
        if h < 30 and w < 30:
            continue
        else:
            if h > w:
                crop_img = np.rot90(crop_img)
            if h > size[1] and w > size[0]:
                res_img = cv2.resize(crop_img, size, cv2.INTER_AREA)
                if normalize:
                    res_img = normalized(res_img)
                try:
                    final = Image.fromarray((res_img).astype(np.uint8))
                    final.save(dst + '{0}/id{1}_crop_yolo_{2}'.format(labels[i], crop_filenames[i][:-4], str(i) +
                                                            '.jpg'))
                except Exception:
                    print('Saving failed for image: ', crop_filenames[i])
            else:
                res_img = cv2.resize(crop_img, size, cv2.INTER_CUBIC)
                if crop_filenames[i] in saved_imgs:
                    continue
                else:
                    if normalize:
                        res_img = normalized(res_img)
                    try:
                        final = Image.fromarray((res_img).astype(np.uint8))
                        final.save(dst + '{0}/id{1}_crop_yolo_{2}'.format(labels[i], crop_filenames[i][:-4], str(i) +
                                                                '.jpg'))
                    except Exception:
                        print('Saving failed for image: ', crop_filenames[i])

    def get_orig_imgnames():
        orig_imgnames = []
        folders = ['Type_1', 'Type_2', 'Type_3']
        for fld in folders:
            index = folders.index(fld)
            print('Load folder {} (Index: {})'.format(fld, index))
            dst = os.path.join(src, fld, '*.jpg')
            files = glob.glob(dst)
            for fl in files:
                flbase = fld + '/' + os.path.basename(fl)
                orig_imgnames.append(flbase)
        return orig_imgnames

    def set_diff(normalize = False):
        print('Number of test data set difference images:', len(testsetdiff))
        for i in testsetdiff:
            diff_img = ndimage.imread(src + i, mode = 'RGB')
            diff_img_resized = cv2.resize(diff_img, size, cv2.INTER_AREA)
            print(i)
            if normalize:
                diff_img_resized = normalized(diff_img_resized)
            final = Image.fromarray((diff_img_resized).astype(np.uint8))
            final.save(dst + '{0}/id{1}_original_yolo{2}'.format(i.split('/')[0], i.split('/')[1][:-4], '.jpg'))
        return

    orig_images_imgnames = get_orig_imgnames()
    crop_set = set(cropped_images['class_filename'].tolist())
    test_set = set(orig_images_imgnames)
    testsetdiff = list(test_set.difference(crop_set))
    set_diff()
    if augment:
        augment_train(dst)
        
    return 

def make_crops_test(cropped_images, dst, test = False, src = None, normalize = False):
    pad_col = [0, 0, 0]
    saved_imgs = []
    testfiles = os.listdir(src)
    crop_filenames = []
    for i in range(len(cropped_images)):
        crop_filenames.append(cropped_images['filename'][i].split('/')[-1])
    crop_set = set(crop_filenames)
    
    for i in range(len(cropped_images)):
        img = cv2.imread(cropped_images['filename'][i])
        dst = dst
        if test:
            test_filename = cropped_images['filename'][i].split('/')[-1][:-4]
            copy_filename = cropped_images['filename'][i].split('/')[-1]
        x1, x2, y1, y2 = int(cropped_images['x1'][i]), int(cropped_images['x2'][i]), int(cropped_images['y1'][i]), int(cropped_images['y2'][i])
        crop_img = img[y1:y2, x1:x2]
        h, w = crop_img.shape[0], crop_img.shape[1]
        if h < 30 and w < 30:
            print('Crop {} omitted'.format(cropped_images['filename'][i]))
            continue
        else:
            if h > w:
                crop_img = np.rot90(crop_img)
            if h > size[1] and w > size[0]:
                res_img = cv2.resize(crop_img, size, cv2.INTER_AREA)
                if normalize:
                    res_img = normalized(res_img)
                if test:
                    cv2.imwrite(dst + test_filename + '_yolo_' + str(i) + '.jpg'  , res_img)
                else:
                    cv2.imwrite(dst + str(i) + '.jpg', res_img)
            else:
                res_img = cv2.resize(crop_img, size, cv2.INTER_CUBIC)
                if normalize:
                    res_img = normalized(res_img)
                if test:
                    if test_filename in saved_imgs:
                        continue
                    else:
                        cv2.imwrite(dst + test_filename + '_yolo_' + str(i) + '.jpg'  , res_img)
                else:
                    cv2.imwrite(dst + test_filename + '_yolo_' + str(i) + '.jpg' , res_img)
    
    
    def set_diff(normalize = False):
        print('\n', 'Take set difference between raw images and crops')
        test_set = set(testfiles)
        testsetdiff = list(test_set.difference(crop_set))
        print('Number of test data set difference images:', len(testsetdiff))
        for i in testsetdiff:
            diff_img = cv2.imread(src + i)
            diff_img_resized = cv2.resize(diff_img, size, cv2.INTER_LINEAR)
            if normalize:
                diff_img_resized = normalized(diff_img_resized)
            cv2.imwrite(dst + i[:-4] + '_origtest.jpg', diff_img_resized)
        return
    
    set_diff()
    
    return

In [None]:
tr_origpath = '/home/w/Development/darknet/cervix_yolo/train_yolo/'
tr_croppath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/train_crops_yolo_299_normalized/'
tr_respath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Old/NCF/darknet2/darknet/Cervix/train_res/train_res100kvoc.txt'

te_origpath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/test/'
te_croppath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/test_crops_yolo_299_normalized/'
te_respath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Old/NCF/darknet2/darknet/Cervix/test_res/res100kvoc.txt'

tradd_origpath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/full_data_renamed/'
tradd_croppath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/train_additional_0.25_crops/'
tradd_respath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Old/NCF/darknet2/darknet/Cervix/train_additional_res/res100kvoc_combined_025.txt'


In [None]:
testdf = load_test(te_origpath)
bb_te, co_te = load_boxes(te_respath, testdf)
cropte = crop(bb_te)
cropte['img_name'] = cropte['filename'].str[-13:]

In [None]:
traindf = load_train(tr_origpath)
bb_tr, co_tr = load_boxes(tr_respath, traindf)
croptr = crop(bb_tr, True)

In [None]:
print_crops(croptr, len(croptr), 5)
print_crops(cropte, len(cropte), 5)

In [None]:
p1 = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/train_crops/'
p2 = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/train_crops_yolo_299_normalized/'

labels_set = ['Type_1', 'Type_2', 'Type_3']
make_dirs(p2, labels_set)

In [None]:
size = (299, 299)

#make_crops_test(cropte, te_croppath, True, te_origpath, normalize = True)
make_crops_train(croptr, tr_croppath, traindf, tr_origpath, augment = True, normalize = True)
#make_crops_train(croptr, tradd_croppath, traindf, tradd_origpath, augment = False)