In [1]:
import cv2
import numpy as np
import pandas as pd
import os
import glob
import datetime
import time
import shutil
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")
from scipy import misc, ndimage
from PIL import Image

from processing_utils import *

In [2]:
def make_crops_train(cropped_images, path, traindf, test_path = None, augment = False, normalize = False):
    pad_col = [0, 0, 0]
    saved_imgs = []
    cropped_imgs = []
    cropped_labels = []
    crop_filenames = cropped_images['img_name']
    labels = cropped_images['class']
    labels_set = ['Type_1', 'Type_2', 'Type_3']
    for i in range(len(cropped_images)):
        if cropped_images['img_name'][i] in saved_imgs:
            continue
        else:
            try:
                img = ndimage.imread(cropped_images['filename'][i], mode = 'RGB')
                x1, x2, y1, y2 = int(cropped_images['x1'][i]), int(cropped_images['x2'][i]), int(cropped_images['y1'][i]), int(cropped_images['y2'][i])
                crop_img = img[y1:y2, x1:x2]
                h, w = crop_img.shape[0], crop_img.shape[1]
                saved_imgs.append(cropped_images['img_name'][i])
            except Exception:
                print('Failed for image:', cropped_images['filename'][i])
                continue
            if h < 30 and w < 30:
                continue
            else:
                if h > w:
                    crop_img = np.rot90(crop_img)
                if h > size[1] and w > size[0]:
                    res_img = cv2.resize(crop_img, size, cv2.INTER_AREA)
                    if normalize:
                        res_img = normalized(res_img)
                    try:
                        final = Image.fromarray((res_img).astype(np.uint8))
                        final.save(path + '{0}/{1}_{2}'.format(labels[i], str(i), crop_filenames[i]))
                        cropped_labels.append(labels[i])
                    except KeyError:
                        print('Saving failed for image: ', crop_filenames[i])
                else:
                    res_img = cv2.resize(crop_img, size, cv2.INTER_CUBIC)
                    if normalize:
                        res_img = normalized(res_img)
                    try:
                        final = Image.fromarray((res_img).astype(np.uint8))
                        final.save(path + '{0}/{1}_{2}'.format(labels[i], str(i), crop_filenames[i]))
                        cropped_labels.append(labels[i])
                    except KeyError:
                        print('Saving failed for image: ', crop_filenames[i])
                        
    crop_set = set(crop_filenames)
    test_set = set(traindf['img_name'])
    testsetdiff = list(test_set.difference(crop_set))
    print('Number of train data set difference images:', len(testsetdiff))
    return 

def make_crops_test(cropped_images, path, test = False, test_path = None, normalize = False):
    pad_col = [0, 0, 0]
    saved_imgs = []
    testfiles = os.listdir(test_path)
    crop_filenames = []
    for i in range(len(cropped_images)):
        crop_filenames.append(cropped_images['filename'][i].split('/')[-1])
    crop_set = set(crop_filenames)
    
    for i in range(len(cropped_images)):
        img = cv2.imread(cropped_images['filename'][i])
        path = path
        if test:
            test_filename = cropped_images['filename'][i].split('/')[-1][:-4]
            copy_filename = cropped_images['filename'][i].split('/')[-1]
        x1, x2, y1, y2 = int(cropped_images['x1'][i]), int(cropped_images['x2'][i]), int(cropped_images['y1'][i]), int(cropped_images['y2'][i])
        crop_img = img[y1:y2, x1:x2]
        h, w = crop_img.shape[0], crop_img.shape[1]
        if h < 30 and w < 30:
            print('Crop {} omitted'.format(cropped_images['filename'][i]))
            continue
        else:
            if h > w:
                crop_img = np.rot90(crop_img)
            if h > size[1] and w > size[0]:
                res_img = cv2.resize(crop_img, size, cv2.INTER_AREA)
                if normalize:
                    res_img = normalized(res_img)
                cv2.imwrite(path + test_filename + '_' + str(i) + '.jpg'  , res_img)
            else:
                res_img = cv2.resize(crop_img, size, cv2.INTER_CUBIC)
                if normalize:
                    res_img = normalized(res_img)
                cv2.imwrite(path + test_filename + '_' + str(i) + '.jpg' , res_img)
    
    
    def set_diff(normalize = False):
        print('\n', 'Take set difference between raw images and crops')
        test_set = set(testfiles)
        testsetdiff = list(test_set.difference(crop_set))
        print('Number of test data set difference images:', len(testsetdiff))
        for i in testsetdiff:
            diff_img = cv2.imread(test_path + i)
            diff_img_resized = cv2.resize(diff_img, size, cv2.INTER_LINEAR)
            if normalize:
                diff_img_resized = normalized(diff_img_resized)
            cv2.imwrite(path + i[:-4] + '_origtest.jpg', diff_img_resized)
        return
    
    #crops_u = recover_crops()
    #crops_u = recover_origtest(False)
    set_diff(False)
    return

In [3]:
tr_origpath = '/home/w/Development/darknet/cervix_yolo/train_yolo/'
tr_croppath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/train_crops/'
tr_respath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Old/NCF/darknet2/darknet/Cervix/train_res/train_res100kvoc.txt'

tradd_origpath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/full_data_renamed/'
tradd_croppath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/train_additional_0.25_crops/'
tradd_respath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Old/NCF/darknet2/darknet/Cervix/train_additional_res/res100kvoc_combined_025.txt'

te_origpath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/test/'
te_croppath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/test_crops/'
te_respath = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Old/NCF/darknet2/darknet/Cervix/test_res/res100kvoc.txt'

In [None]:
testdf = load_test(te_origpath)
bb_te, co_te = load_boxes(te_respath, testdf)
cropte = crop(bb_te)
cropte['img_name'] = cropte['filename'].str[-13:]

In [None]:
traindf = load_train(tr_origpath)
bb_tr, co_tr = load_boxes(tr_respath, traindf)
croptr = crop(bb_tr, True)

In [5]:
traindf = load_train(tradd_origpath)
bb_tr, co_tr = load_boxes(tradd_respath, traindf)
croptr = crop(bb_tr, True)

Load folder Type_1 (Index: 0)
Failed for image: additional_0983.jpg
Load folder Type_2 (Index: 1)
Failed for image: additional_3059.jpg
Failed for image: additional_1573.jpg
Load folder Type_3 (Index: 2)
Time it took to load train data: 1008.1881549358368
Images loaded from: /media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/full_data_renamed/ 
 
       0     1  2                    0
0  4128  3096  3  additional_0092.jpg
1  4128  3096  3  additional_0050.jpg
2  3264  2448  3  additional_0954.jpg
3  4160  3120  3  additional_0372.jpg
4  4160  3120  3  additional_0749.jpg 
 

[]
Bounding Boxes results loaded from: /media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Old/NCF/darknet2/darknet/Cervix/train_additional_res/res100kvoc_combined_025.txt 
 
                                             filename      xmin      ymin  \
0  /media/w/1c392724-ecf3-4615-8f3c-79368ec36380/...  0.214765  0.428662   
1  /media/w/1c392724-ecf3-4615-8f3c-79368ec36

In [None]:
print_crops(croptr, len(croptr), 20)
#print_crops(cropte, len(cropte), 20)

In [4]:
p1 = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/train_crops/'
p2 = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/train_additional_0.25_crops/'
p3 = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/train_crops_nondups/'
p4 = '/media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/trainadd_crops_nondups/'

labels_set = ['Type_1', 'Type_2', 'Type_3']

make_dirs(p2, labels_set)
#make_dirs(p4, labels_set)

In [6]:
size = (299, 299)

#make_crops_test(cropte, te_croppath, True, te_origpath)
#make_crops_train(croptr, tr_croppath, traindf, tr_origpath, augment = False)
make_crops_train(croptr, tradd_croppath, traindf, tradd_origpath, augment = False)

Failed for image: /media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/full_data_renamed/Type_1/additional_0612.jpg
Failed for image: /media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/full_data_renamed/Type_1/additional_0612.jpg
Failed for image: /media/w/1c392724-ecf3-4615-8f3c-79368ec36380/DS Projects/Kaggle/Intel_Cervix/data/full_data_renamed/Type_1/original_0233.jpg
Number of train data set difference images: 200
