In [232]:
import os
import numpy as np
import cv2
import scipy.io
import matplotlib.pyplot as plt
import shutil
from PIL import Image, ImageOps

In [233]:
def load_dict(image_dir, gt_dir):
    image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
    image_files.sort()  # opcjonalnie
    counts = []
    c = 0
    if not image_files:
        raise ValueError("Brak plików .jpg w podanym katalogu!")

    for img_file in image_files:
        img_path = os.path.join(image_dir, img_file)
        mat_path = os.path.join(gt_dir, 'GT_' + img_file.replace('.jpg', '.mat'))
        img = Image.open(img_path).convert('RGB')
        mat = scipy.io.loadmat(mat_path)
        count = len(mat['image_info'][0][0][0][0][0])
        counts.append(count)
        # if c == 0:
        #     plt.imshow(img)
        #     print(image_files)
        #     break
        # c += 1
    d = dict(zip(image_files, counts))
    sorted_dictionary = dict(sorted(d.items(), key=lambda item: item[1]))
    return sorted_dictionary

In [37]:
train_d = load_dict('ShanghaiTechClass/part_B/train_data/images/', 'ShanghaiTechClass/part_B/train_data/ground-truth/')

In [39]:
test_d = load_dict('ShanghaiTechClass/part_B/test_data/images/', 'ShanghaiTechClass/part_B/test_data/ground-truth/')

In [130]:
def count_thresholds(dictionary, classes):
    """
    Liczymy przedziały dla posortowanych słowników
    """
    border = np.ceil(len(dictionary)/classes)
    class_num = 1
    th = []
    i = 1
    for img_file, count in dictionary.items():
        if i < class_num * border:
            pass
        else:
            th.append(count)
            class_num += 1
        i += 1
    th.append(count)
    return th

In [175]:
def estimate_optimal_thresholds(d1, d2, classes):
    """
    Wybieramy optymalne przedziały dla train i test
    """
    final_thresholds = []
    th1 = count_thresholds(d1, classes)
    th2 = count_thresholds(d2, classes)
    for i in range(len(th1)-1):
        final_thresholds.append(np.ceil((th1[i] + th2[i])/2).astype(int))
    final_thresholds.append(max(th1[-1], th2[-1]) + 1)
    return final_thresholds

In [200]:
def get_count_of_files_in_folders(d1, d2, for_which, classes):
    """
    Sprawdzanie liczebności poszczególnych klas po zastosowaniu estymowanych progów
    """
    count_of_files = []
    counter = 0
    class_num = 1
    i = 1
    th = estimate_optimal_thresholds(d1, d2, classes)
    print(th)
    for img_file, count in for_which.items():
        if count < th[class_num-1]:
            counter += 1
        else:
            count_of_files.append(counter)
            counter = 1
            class_num += 1
        i += 1
    count_of_files.append(counter)
    return count_of_files

In [216]:
def move_files(d1, d2, for_which, src_dir, classes):
    """
    Przerzucamy pliki
    """
    for i in range(1, classes+1):
        os.makedirs(f'{src_dir}class{i}', exist_ok=True)
    class_num = 1
    i = 1
    th = estimate_optimal_thresholds(d1, d2, classes)
    for img_file, count in for_which.items():
        source_file = f'{src_dir}{img_file}'
        destination_file = f'{src_dir}class{class_num}/{img_file}'
        if count < th[class_num-1]:
            shutil.move(source_file, destination_file)
        else:
            class_num += 1
            destination_file = f'{src_dir}/class{class_num}/{img_file}'
            shutil.move(source_file, destination_file)
        i += 1

In [230]:
def move_gt(d1, d2, for_which, src_dir, classes):
    """
    Przerzucamy pliki gt
    """
    for i in range(1, classes+1):
        os.makedirs(f'{src_dir}class{i}', exist_ok=True)
    class_num = 1
    i = 1
    th = estimate_optimal_thresholds(d1, d2, classes)
    for img_file, count in for_which.items():
        mat_file = f'GT_{img_file.replace(".jpg", ".mat")}'
        source_file = f'{src_dir}{mat_file}'
        destination_file = f'{src_dir}class{class_num}/{mat_file}'
        if count < th[class_num-1]:
            pass
            shutil.move(source_file, destination_file)
        else:
            class_num += 1
            destination_file = f'{src_dir}/class{class_num}/{mat_file}'
            shutil.move(source_file, destination_file)
        i += 1

In [227]:
move_gt(train_d, test_d, train_d, 'ShanghaiTechClass/part_B/train_data/ground-truth/', 6)

In [229]:
move_gt(train_d, test_d, test_d, 'ShanghaiTechClass/part_B/test_data/ground-truth/', 6)

In [214]:
move_files(train_d, test_d, train_d, 'ShanghaiTechClass/part_B/train_data/images/', 6)

In [217]:
move_files(train_d, test_d, test_d, 'ShanghaiTechClass/part_B/test_data/images/', 6)