# Утилиты для подготовки данных к обучению каскадов

In [1]:
import cv2
import os
import msvcrt
import numpy as np
from skimage.feature import local_binary_pattern

In [2]:
def safe_write_image(img, dirname, filename, extention='.jpg'):
# сохраняет изображение img в папку dirname под именем filename с расширением extention. Если такой файл уже есть, то
# добавляет к этому имени '_n' - номер [0..99]
    double_counter = 0
    MAX_TRY = 100
    try_name = dirname+filename+extention
    for _ in range(MAX_TRY):
        if os.path.isfile(try_name):
            try_name = dirname+filename+'_'+str(double_counter)+extention
        else:
            cv2.imwrite(try_name, img)
            return
            

In [3]:
def cut_images_from_video(source_video_folder, dest_folder, extentions_list, scale_window = (1,1), wait = 20):
# утилита для нарезки кадров из видео
# source_video_folder - папка с видео
# dest_folder - папка, куда складывать файлы
# extentions_list - перечень расширений файлов, которые будут обрабатываться
# scale_window - масштабирование окна для показа видео
    if not os.path.exists(source_video_folder):
        raise OSError('source path does not exist')

    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    
    cntr = 0
    
    for file in os.listdir(source_video_folder):
        flag = False

        for ext in extentions_list:
            flag = flag or os.fsdecode(file).endswith(ext)

        if flag:
            camera = cv2.VideoCapture(source_video_folder+file)
            
            while(1):
                ret, img = camera.read()
                
                if not ret:
                    camera.release()
                    cv2.destroyAllWindows()
                    break
                    
                shapes = img.shape
                cv2.namedWindow('image', cv2.WINDOW_NORMAL)
                cv2.resizeWindow('image', int(shapes[1]*scale_window[1]),int(shapes[0]*scale_window[0]))
                cv2.imshow('image', img)
                interrupt = cv2.waitKey(wait)
                if interrupt & 0xFF == ord('q') or interrupt & 0xFF == ord('n'):
                    break
                
                if interrupt & 0xFF == ord('c'):
                    safe_write_image(img, dest_folder, 'img_'+str(cntr))
                    cntr+=1
                    
        if interrupt & 0xFF == ord('q'):
            break
            
    camera.release()
    cv2.destroyAllWindows()
    


In [4]:
cv2.destroyAllWindows()

In [None]:
source_video_folder = 'C:/my_cv_work/data/long/'
dest_folder = 'C:/my_cv_work/data/pics/long_file_scr/'
extentions_list = ['.avi']
scales = (1,1)
#cut_images_from_video(source_video_folder, dest_folder, extentions_list, scale_window = scales, wait = 10)

In [5]:
def cut_positive_sample(source_folder, dest_folder, name_template, extentions_list, ext, log_file = 'cut_log.txt'):
# утилита для вырезки положительных примеров из исходных изображений
#
# source_folder - папка с исходными изображениями
# dest_folder - папка, куда складывать нарезанные файлы
# name_template - шаблон имени файла
# extensions_list - список расширений файлов, которые будут обрабатываться: ['.jpg', ...]
# ext - расширение сохраняемых изображений: '.jpg'
    if not os.path.exists(source_folder):
        raise OSError('source path does not exist')

    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
        
    # cut_files - перечень файлов в папке source_folder, которые уже были обработаны. Повторно не обрабатываются
    # чтобы пройти по новой, нужно удалить имена обработанных файлов из файла log_file
    cut_files = []
    if os.path.isfile(source_folder+log_file):
        with open(source_folder+log_file) as f:
            content = f.readlines()
            cut_files = [x.rstrip() for x in content]

    with open(source_folder+log_file, 'a') as f:
        fcounter = 0
        from_center = False

        for file in os.listdir(source_folder):
            flag = False

            for ext in extentions_list:
                flag = flag or os.fsdecode(file).endswith(ext)

            if (not file in cut_files) and flag:
                img = cv2.imread(source_folder + file)
                f.write(file+'\n')

                while(True):
                    r = cv2.selectROI('win_roi', img, from_center)
                    if r == (0,0,0,0):
                        break
                    imCrop = img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
                    fcounter += 1
                    safe_write_image(imCrop,dest_folder,'sample_'+str(fcounter),'.jpg')

            interrupt = cv2.waitKey()

            if interrupt & 0xFF == ord('q'):
                break
                                
    cv2.destroyAllWindows()


In [6]:
def cut_sample_and_delete(
    background_img, source_folder, dest_folder, cut_background_folder, \
    name_template, extentions_list, ext, log_file = 'cut_log.txt'
):
# вырезает сэмплы из изображений и заменяет их на те же участки из подложки background_image
# background_img - изображение - подложка
# source_folder - папка с исходными изображениями
# dest_folder - папка, куда складывать нарезанные файлы
# cut_background_folder - папка для изображений с удаленными положительными примерами
# name_template - шаблон имени файла
# extensions_list - список расширений файлов, которые будут обрабатываться: ['.jpg', ...]
# ext - расширение сохраняемых изображений: '.jpg'
    if not os.path.exists(source_folder):
        raise OSError('source path does not exist')

    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
        
    if not os.path.exists(cut_background_folder):
        os.makedirs(cut_background_folder)
        
    # cut_files - перечень файлов в папке source_folder, которые уже были обработаны. Повторно не обрабатываются
    # чтобы пройти по новой, нужно удалить имена обработанных файлов из файла log_file
    cut_files = []
    
    # читаем изображение-подложку
    bg_img = cv2.imread(background_img)
    
    if os.path.isfile(source_folder+log_file):
        with open(source_folder+log_file) as f:
            content = f.readlines()
            cut_files = [x.rstrip() for x in content]

    with open(source_folder+log_file, 'a') as f:
        fcounter = 0
        dcntr = 0
        from_center = False

        for file in os.listdir(source_folder):
            flag = False

            for ext in extentions_list:
                flag = flag or os.fsdecode(file).endswith(ext)

            if (not file in cut_files) and flag:
                img = cv2.imread(source_folder + file)
                mod_img = img.copy()
                f.write(file+'\n')
                
                cflag = False

                while(True):
                    r = cv2.selectROI('win_roi', img, from_center)
                    if r == (0,0,0,0):
                        if cflag:
                            safe_write_image(mod_img, cut_background_folder, 'del_img'+str(dcntr), '.jpg')
                        dcntr += 1
                        break
                    imCrop = img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
                    imCrop_bg = bg_img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
                    mod_img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] = imCrop_bg
                    fcounter += 1
                    cflag = True
                    safe_write_image(imCrop,dest_folder,'sample_'+str(fcounter),'.jpg')

            interrupt = cv2.waitKey()

            if interrupt & 0xFF == ord('q'):
                break
                                
    cv2.destroyAllWindows() 

In [8]:
background_img = 'C:/my_cv_work/data/pics/long_file_scr/img_115.jpg'
source_folder = 'C:/my_cv_work/data/pics/long_file_scr/'
dest_folder = 'C:/my_cv_work/data/pics/heads_high_quality/'
cut_background_folder = 'C:/my_cv_work/data/pics/heads_high_quality/for_bg/'
name_template = 'img_'
extentions_list = ['.jpg']
ext = '.jpg'

cut_sample_and_delete(
    background_img, source_folder, dest_folder, cut_background_folder, \
    name_template, extentions_list, ext, log_file = 'cut_log.txt'
)

In [None]:
# source_folder = 'C:/my_cv_work/data/pics/cam_1_src/'
# dest_folder = 'C:/my_cv_work/data/pics/heads/'
# name_template = 'img_'
# extentions_list = ['.jpg']
# ext = '.jpg'
# cut_positive_sample(source_folder, dest_folder, name_template, extentions_list, ext, log_file = 'cut_log.txt')

In [134]:
def cut_region_from_collection(source_folder, dest_folder, extentions_list, name_template, roi_file = 'roi.txt', ext='.jpg'):
# вырезает выделенную область из коллекции изображений и сохраняет их в DEST_FOLDER
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
        
    from_center = False
        
    # выбор изображения для вырезки области
    for file in os.listdir(source_folder):
        flag = False

        for ext in extentions_list:
            flag = flag or os.fsdecode(file).endswith(ext)
            
        if flag:
            img = cv2.imread(source_folder+file)
            r = cv2.selectROI('roi', img, from_center)
            if r == (0,0,0,0):
                continue
            else:
                with open(dest_folder+roi_file, 'w') as f:
                    f.write('{} \n'.format(img.shape))
                    f.write('{} \n'.format((int(r[1]),int(r[1]+r[3]),int(r[0]),int(r[0]+r[2]))))
                cv2.destroyWindow('roi')
                break
                
    fcounter = 0                        
    for file in os.listdir(source_folder):
        flag = False

        for ext in extentions_list:
            flag = flag or os.fsdecode(file).endswith(ext)
            
        if flag:
            img = cv2.imread(source_folder+file)
            cimg = img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
            safe_write_image(cimg, dest_folder, name_template+str(fcounter), ext)
            fcounter += 1

In [135]:
source_folder = 'C:/my_cv_work/data/pics/heads_2/noheads/'
dest_folder = 'C:/my_cv_work/data/pics/heads_2/noheads/model_roi/'
extentions_list = ['.jpg']
name_template = 'roi_'
cut_region_from_collection(source_folder, dest_folder, extentions_list, name_template)

In [101]:
# source_folder = 'C:/my_cv_work/data/pics/heads_2/noheads/test_cutter/'
# dest_folder = 'C:/my_cv_work/data/pics/heads_2/noheads/test_cutter/roi/'
# extentions_list = ['.jpg']
# name_template = 'roi_'
# cut_region_from_collection(source_folder, dest_folder, extentions_list, name_template)

In [193]:
def shift_x(img, shift):
    shapes = img.shape
    sh_x = shapes[1]
    out = img.copy()
    out[:, shift:] = img[:,:sh_x-shift]
    out[:,:shift] = img[:,sh_x-shift:]
    return out

def shift_y(img, shift):
    shapes = img.shape
    sh_y = shapes[0]
    out = img.copy()
    out[shift:,:] = img[:sh_y-shift,:]
    out[:shift,:] = img[sh_y-shift:,:]
    return out

def cut_negative_samples(img, winsize, step_x, step_y, dest_dir, name_template, ext='.jpg'):
# разрезает изображение на сэмплы размера winsize и сохраняет их в папку dest_dir
# img - исходное изображение
# winsize (tuple (y,x)) - размер сэмпла < размера изображения
# step_x - шаг по x, с которым будут нарезатьс сэмплы
# step_y - шаг по y, с которым будут нарезаться сэмплы
# name_template - шаблон имени сэмплов
# ext - расширения файлов-сэмлов
    cc=0
    shapes = img.shape
    win_y = winsize[0]
    win_x = winsize[1]
    for j in range(0, win_y+1, step_y):
        if step_y <=0:
            j = win_y+1
        for i in range(0, win_x+1, step_x):
            if step_x <=0:
                i = win_x+1
            curr_img = shift_x(shift_y(img,j), i)
            for b_bound in range(win_y, shapes[0], win_y):
                u_bound = b_bound - win_y
                for r_bound in range(win_x, shapes[1], win_x):
                    l_bound = r_bound - win_x
                    cropped = img[u_bound:b_bound,l_bound:r_bound,:]
                    name = name_template+str(cc)
                    safe_write_image(cropped, dest_dir,name,ext)
                    cc+=1
                    
def serial_cut_negative_samples(source_folder, dest_folder, winsize, step_x, step_y, name_template, extentions_list):
# режет на сэмплы выбранные файлы в директории source_folder и складывает их в dest_folder
# сэмплирование происходит по нажатию клавиши 'c'
# выход - 'q'
    fcounter = 0
    for file in os.listdir(source_folder):
        flag = False

        for ext in extentions_list:
            flag = flag or os.fsdecode(file).endswith(ext)

        if flag:
            img = cv2.imread(source_folder+file)
            cv2.imshow('ex', img)
            interrupt = cv2.waitKey()

            if interrupt & 0xFF == ord('q'):
                break

            if interrupt & 0xFF == ord('c'):
                cut_negative_samples(img, winsize, step_x, step_y, dest_folder, name_template, '.jpg')

    cv2.destroyAllWindows()
    


In [103]:
def cut_image_into_winsize(img, winsize):
# разрезает изображение img на изображения размера winsize (nx,ny,:)
    shapes = img.shape
    win_x = winsize[0]
    win_y = winsize[1]
    nx = shapes[1]//win_x
    ny = shapes[0]//win_y
    if np.ndim(img) == 3:
        out_ary = np.zeros((nx*ny,win_y, win_x, shapes[2]), dtype=np.uint8)
    else:
        out_ary = np.zeros((nx*ny,win_y, win_x), dtype=np.uint8)
    cc = 0
    for b_bound in range(win_y, shapes[0], win_y):
        u_bound = b_bound - win_y
        for r_bound in range(win_x, shapes[1], win_x):
            l_bound = r_bound - win_x
            cropped = img[u_bound:b_bound,l_bound:r_bound]
            out_ary[cc] = cropped
            cc += 1
    return out_ary 

In [233]:
def kullback_leibler_divergence(p, q):
    p = np.asarray(p)
    q = np.asarray(q)
    filt = np.logical_and(p !=0, q!=0)
    return np.sum(p[filt]*np.log2(p[filt]/q[filt]))

def lbp_similar(a, b, threshold=0.07, method='default', n_points=8, radius=1, verbose=True):
    if b.shape != a.shape:
        raise TypeError
    lbp_a = local_binary_pattern(a, n_points, radius, method)
    lbp_b = local_binary_pattern(b, n_points, radius, method)
    n_bins = n_points+2
    hist_a, _ = np.histogram(lbp_a, normed=True, bins=n_bins, range=(0, n_bins))
    hist_b, _ = np.histogram(lbp_b, normed=True, bins=n_bins, range=(0, n_bins))
    score = abs(kullback_leibler_divergence(hist_a, hist_b))
#     if verbose:
#         print(score, threshold)
    return score <= threshold

In [162]:
def make_histogram_of_sobel_grad(img, n_bins=16, ksize_=3):
    grad_x = cv2.Sobel(img, cv2.CV_64F, dx=1, dy=0, ksize=ksize_)
    grad_y = cv2.Sobel(img, cv2.CV_64F, dx=0, dy=1, ksize=ksize_)
    img_g, img_theta = cv2.cartToPolar(grad_x, grad_y)
    hist, _ = np.histogram(img_theta.flatten(), bins=n_bins, range=(0, 2*np.pi), weights=img_g.flatten(),normed=True)
    return hist
    
def sobel_similar(a, b, threshold=0.1, n_bins_=16, ksize__=3):
    if b.shape != a.shape:
        raise TypeError

    hist_a = make_histogram_of_sobel_grad(a, n_bins=n_bins_, ksize_=ksize__)
    hist_b = make_histogram_of_sobel_grad(b, n_bins=n_bins_, ksize_=ksize__)
    score = abs(kullback_leibler_divergence(hist_a, hist_b))
    #print(score)
    return score <= threshold

In [157]:
img1 = cv2.imread('C:/my_cv_work/data/pics/heads_2/noheads/model_roi/main.jpg')
img2 = cv2.imread('C:/my_cv_work/data/pics/heads_2/noheads/model_roi/roi_0.jpg')
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
ary1 = cut_image_into_winsize(img1, (48,48))
ary2 = cut_image_into_winsize(img2, (48,48))

In [158]:
for i in range(ary1.shape[0]):
    cc = np.concatenate((ary1[i], ary2[i]))
    print(sobel_similar(ary1[i], ary2[i], n_bins_ = 16))
    cv2.imshow('ex', cc)
    interrupt = cv2.waitKey()

    if interrupt & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break

0.00784914943482
True
0.0138740032185
True
0.00968722056351
True
0.0183303255781
True
0.00916983032942
True
0.0185809606847
True
2.9413164956
False


In [173]:
def get_unique_samples(
    main_sample_ary, cut_img_ary, threshold_ = 0.1, method='sobel', sobel_bins=16, sobel_ksize=3, \
    lbp_method='default', lbp_npoints=8, lbp_radius=1
):
    uniques_ary = []
    winsize = main_sample_ary[0].shape[0:2]
    n_img = main_sample_ary.shape[0]
    if main_sample_ary.shape != cut_img_ary.shape:
        raise TypeError

    for i in range(n_img):
        main_sample = main_sample_ary[i]
        test_sample = cut_img_ary[i]
        if method == 'sobel':
            similar = sobel_similar(
                main_sample, test_sample, threshold=threshold_, n_bins_=sobel_bins, ksize__=sobel_ksize
            )
        elif method == 'lbp':
            similar = lbp_similar(
                main_sample, test_sample, threshold=threshold_, method=lbp_method, n_points=lbp_npoints, radius=lbp_radius
            )
        else:
            raise TypeError
        if not similar:
            uniques_ary.append(test_sample)

    return uniques_ary

In [164]:
l = get_unique_samples(ary1, img2)
len(l)

51

In [241]:
def serial_cut_no_duplicates(
    source_folder, dest_folder, main_file,  winsize, step_x, step_y, name_template, extentions_list,\
    threshold_ = 0.1, method='sobel', sobel_bins=16, sobel_ksize=3,\
    lbp_method='default', lbp_npoints=8, lbp_radius=1\
):
    cc=0
    cfile = 0
    main_img = cv2.imread(source_folder+main_file, flags=cv2.IMREAD_GRAYSCALE)
    shapes = main_img.shape
    win_y = winsize[0]
    win_x = winsize[1]
    for j in range(0, win_y+1, step_y):
        for i in range(0, win_x+1, step_x):
            curr_img = shift_x(shift_y(main_img,j), i)
            main_img_ary = cut_image_into_winsize(curr_img, winsize)
            for z in range(main_img_ary.shape[0]):
                safe_write_image(main_img_ary[z], dest_folder, 'main_'+name_template+str(cc))
                cc +=1
            
            for file in os.listdir(source_folder):
                if os.fsdecode(file) != main_file and os.fsdecode(file).endswith('.jpg'):
                    cfile +=1
                    img = cv2.imread(source_folder+file, flags=cv2.IMREAD_GRAYSCALE)
                    cimg = shift_x(shift_y(img,j), i)
                    ctimg = cut_image_into_winsize(cimg, winsize)
                    uniques = get_unique_samples(main_img_ary, ctimg, threshold_=threshold_, method=method)
                    print(len(uniques), file, cfile)
                    for img_ in uniques:
                        safe_write_image(img_, dest_folder, name_template+str(cc))
                        cc +=1  

In [None]:
source_folder = 'C:/my_cv_work/data/pics/heads_2/noheads/model_roi/'
dest_folder = 'C:/my_cv_work/data/pics/heads_2/noheads/model_roi/samples_108_108/'
main_file = 'main.jpg'
winsize = (108,108)
step_x = 24
step_y = 24
name_template = 'sample_'
extentions_list = ['.jpg']
serial_cut_no_duplicates(source_folder, dest_folder, main_file,  winsize, step_x, step_y, name_template, extentions_list, method='lbp',threshold_=0.3)

In [192]:
# main_sample_ary = cut_image_into_winsize(img1, (100,100))
# diff_ary = get_unique_samples(main_sample_ary, img2)
# print(len(diff_ary))
# for img in diff_ary:
#     cv2.imshow('ex', img)
#     interrupt=cv2.waitKey()
#     if interrupt & 0xFF == ord('q'):
#      cv2.destroyAllWindows()
#         break
cv2.destroyAllWindows()

In [123]:
def del_heads(source_folder, dest_folder, log_file, extentions_list):
    from_center = False
    fcounter = 0
    with open(source_folder+log_file, 'a') as f:
        for file in os.listdir(source_folder):
            flag = False

            for ext in extentions_list:
                flag = flag or os.fsdecode(file).endswith(ext)

            if flag:
                write_flag = False
                simg = cv2.imread(source_folder + file)
                img = simg.copy()
                f.write(file+'\n')

                while(True):
                    r = cv2.selectROI('win_roi', img, from_center)
                    if r == (0,0,0,0):
                        break
                    img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] = 255
                    write_flag = True

                if write_flag:
                    safe_write_image(img,dest_folder,'sample_'+str(fcounter),'.jpg')
                    fcounter += 1

                interrupt = cv2.waitKey()

                if interrupt & 0xFF == ord('n'):
                    break

    cv2.destroyAllWindows()

In [10]:
? cv2.resize

In [12]:
def img_resizer(source_folder, dest_folder, final_size, extentions_list):
    for file in os.listdir(source_folder):
            flag = False

            for ext in extentions_list:
                flag = flag or os.fsdecode(file).endswith(ext)

            if flag:
                img = cv2.imread(os.path.join(source_folder,file))
                img = cv2.resize(img,final_size)
                cv2.imwrite(os.path.join(dest_folder,file),img)

In [21]:
source_folder = 'C:\my_cv_work\data\pics\people_from_long_sorted\\rest'
extentions_list=['.jpg', '.jpeg']
final_size = (60,120)
#for i in range(1,31):
src = source_folder #os.path.join(source_folder, str(i))
dst=src
img_resizer(src,dst,final_size,extentions_list)

In [8]:
def safe_renamer(source_folder, name_template, extentions_list, ext):
    cntr = 0
    for file in os.listdir(source_folder):
        flag = False

        for ext in extentions_list:
            flag = flag or os.fsdecode(file).endswith(ext)

        if flag:
            src_name = os.path.join(source_folder,os.fsdecode(file))
            templ = name_template+str(cntr)+ext
            out_name = os.path.join(source_folder, templ)
            cntr += 1
            try:
                os.rename(src_name, out_name)
            except FileExistsError:
                pass

In [15]:
source_folder = 'C:/my_cv_work/data/pics/people_from_long_sorted/rest'
extentions_list = ['.jpg', '.jpeg']
ext = '.jpg'
#for i in range(1,31):
name_template = 'pers_rest_'
safe_renamer(source_folder, name_template, extentions_list, ext)

In [138]:
def grayscaler(source_folder, dest_folder, extentions_list, ext):
    for file in os.listdir(source_folder):
        flag = False

        for ext in extentions_list:
            flag = flag or os.fsdecode(file).endswith(ext)

        if flag:
            img = cv2.imread(source_folder+file)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imwrite(dest_folder+file, img)

In [246]:
source_folder = 'D:/cv_data/cascade_training/try_3/positives/'
dest_folder = source_folder
grayscaler(source_folder, dest_folder, ['.jpg'], '')

In [250]:
def negative_samples_file_creator(current_dir, neg_dir, neg_fname):
    with open(current_dir+neg_fname, 'w') as f:
        for file in os.listdir(neg_dir):
            f.write(neg_dir+file+'\n')

In [251]:
current_dir = 'D:/cv_data/cascade_training/try_3/'
neg_dir = 'D:/cv_data/cascade_training/try_3/negatives/'
neg_fname = 'negatives.txt'
negative_samples_file_creator(current_dir, neg_dir, neg_fname)

In [252]:
def positive_samples_file_creator(current_dir, pos_dir, pos_fname = 'positives.txt'):
    with open(current_dir+pos_fname, 'w') as f:
        for file in os.listdir(pos_dir):
            shapes = cv2.imread(pos_dir+file).shape
            string = pos_dir+file + ' 1 0 0 '+str(shapes[1])+' '+str(shapes[0])+'\n'
            f.write(string)

In [253]:
pos_dir = 'D:/cv_data/cascade_training/try_3/positives/'
positive_samples_file_creator(current_dir, pos_dir)

In [None]:
img_resizer('C:/my_cv_work/neural_classifier/try_1/negatives/','C:/my_cv_work/neural_classifier/try_1/negatives/resized/' , (48,48), ['.jpg'])

In [255]:
? dict.setdefault

In [265]:
def resolution_counter(source_dir):
    diff_shape = {}
    for file in os.listdir(source_dir):
        if os.fsdecode(file).endswith('.jpg'):
            img = cv2.imread(source_dir+file, flags=cv2.IMREAD_GRAYSCALE)
            shapes = img.shape
            if not shapes in diff_shape.keys():
                diff_shape.setdefault(shapes, 1)
            else:
                diff_shape[shapes] += 1
    pprint.pprint(diff_shape)

In [266]:
source_dir = 'D:/cv_data/cascade_training/try_3/positives/'
resolution_counter(source_dir)

{(21, 21): 1,
 (23, 21): 1,
 (23, 22): 1,
 (23, 25): 1,
 (24, 20): 1,
 (24, 22): 1,
 (24, 23): 1,
 (24, 26): 1,
 (25, 20): 1,
 (25, 21): 3,
 (25, 23): 1,
 (25, 24): 2,
 (26, 19): 1,
 (26, 21): 3,
 (26, 22): 2,
 (26, 23): 4,
 (26, 24): 8,
 (26, 25): 3,
 (26, 26): 1,
 (26, 28): 2,
 (26, 33): 1,
 (27, 22): 3,
 (27, 23): 1,
 (27, 24): 4,
 (27, 25): 3,
 (27, 26): 5,
 (27, 27): 2,
 (27, 29): 1,
 (27, 30): 1,
 (27, 32): 1,
 (28, 21): 4,
 (28, 22): 4,
 (28, 23): 5,
 (28, 24): 14,
 (28, 25): 8,
 (28, 26): 6,
 (28, 27): 4,
 (28, 28): 5,
 (28, 30): 1,
 (28, 31): 1,
 (28, 32): 1,
 (28, 33): 1,
 (29, 21): 2,
 (29, 22): 3,
 (29, 23): 6,
 (29, 24): 6,
 (29, 25): 8,
 (29, 26): 9,
 (29, 27): 12,
 (29, 28): 9,
 (29, 29): 3,
 (29, 31): 1,
 (29, 32): 3,
 (29, 35): 1,
 (30, 22): 4,
 (30, 23): 8,
 (30, 24): 7,
 (30, 25): 14,
 (30, 26): 15,
 (30, 27): 13,
 (30, 28): 14,
 (30, 29): 6,
 (30, 30): 6,
 (30, 31): 3,
 (30, 32): 1,
 (30, 33): 1,
 (30, 34): 3,
 (30, 35): 3,
 (30, 36): 1,
 (31, 23): 5,
 (31, 24): 8,


In [264]:
? pprint

In [271]:
(31,29) < (30,30)

False

In [30]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest')

img = load_img('C:/my_cv_work/data/pics/people_from_long_sorted/1/pers_1_0.jpeg')  # this is a PIL image
x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)

# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory
i = 0
for batch in datagen.flow(x, batch_size=32, save_prefix = 'pers_1',
                          save_to_dir='C:/my_cv_work/data/pics/people_from_long_sorted/1/preview', save_format='jpeg'):
    i += 1
    if i > 1:
        break  # otherwise the generator would loop indefinitely

In [45]:
data_path = 'C:\\my_cv_work\\data\\pics\\people_from_long_sorted\\1'
save_dir = 'C:\\my_cv_work\\data\\pics\\people_from_long_sorted\\1\\aug'

In [46]:
datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest')

In [47]:
i=0
for batch in datagen.flow_from_directory(
        directory=data_path,  # this is the target directory
        target_size=(120, 60),  # all images will be resized to 150x150
        batch_size=16,
        save_to_dir=save_dir):
    i+=1
    if i>1:
        break


Found 0 images belonging to 1 classes.


ZeroDivisionError: integer division or modulo by zero

In [49]:
import shutil
src = 'C:\\my_cv_work\\data\\pics\\people_from_long_sorted'
root_dest = 'C:\\my_cv_work\\data\\pics\\people_from_long_sorted\\little_examples'
for i in range (1,31):
    csrc = os.path.join(src,str(i))
    nfiles = len(os.listdir(csrc))
    if nfiles <100:
        dest = os.path.join(root_dest, str(i))
        os.makedirs(dest)
        for file in os.listdir(csrc):
            shutil.copy2(os.path.join(csrc, file),dest)

In [53]:
def gen_samples(source_dir, dest_dir, target_number_of_samples, datagen):
    nfiles = len(os.listdir(source_dir))
    nepoch = target_number_of_samples // nfiles
    for file in os.listdir(source_dir):
        img = load_img(os.path.join(source_dir, file))  # this is a PIL image
        x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
        x = x.reshape((1,) + x.shape)
        i = 1
        for batch in datagen.flow(x, batch_size=1, save_prefix = 'pers',
            save_to_dir=dest_dir, save_format='jpeg'):
            i += 1
            if i > nepoch:
                break  # otherwise the generator would loop indefinitely

In [54]:
datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest')

In [70]:
root = 'C:\\my_cv_work\\data\\pics\\people_from_long_sorted_120x60\\little_examples'
for _dir in os.listdir(root):
    cdir = os.path.join(root,_dir)
    gen_samples(cdir,cdir,120,datagen)

In [57]:
a=[1,2,3]

In [60]:
def keep_only_n(src_dir, nkeep):
    while(len(os.listdir(src_dir))>nkeep):
        flist = os.listdir(src_dir)
        n=np.random.randint(0,len(flist)-1)
        fdel=flist[n]
        os.remove(os.path.join(src_dir,fdel))

In [71]:
root='C:\\my_cv_work\\data\\pics\\people_for_model'
for d in os.listdir(root):
    keep_only_n(os.path.join(root,d),120)

In [72]:
root='C:\\my_cv_work\\data\\pics\\people_for_model'
for d in os.listdir(root):
    print(d,len(os.listdir(os.path.join(root,d))))

1 120
10 120
11 120
12 120
13 120
14 120
15 120
16 120
17 120
18 120
19 120
2 120
20 120
21 120
22 120
23 120
24 120
25 120
26 120
27 120
28 120
29 120
3 120
30 120
4 120
5 120
6 120
7 120
8 120
9 120


In [77]:
train_dir='C:\\my_cv_work\\data\\pics\\people_for_model\\train'
test_dir='C:\\my_cv_work\\data\\pics\\people_for_model\\test'
ftrain=os.listdir(train_dir)
for f in ftrain:
    dest_f=os.path.join(test_dir,f)
    os.mkdir(dest_f)
    while(len(os.listdir(os.path.join(train_dir,f)))>100):
        flist=os.listdir(os.path.join(train_dir,f))
        n=np.random.randint(0,len(flist)-1)
        file=os.path.join(train_dir,f,flist[n])
        shutil.copy2(file,dest_f)
        os.remove(file)
        