In [1]:
import random
import time
import os
import shutil
import json

from tqdm.notebook import tqdm

import cv2
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim

from PIL import Image
from IPython import display

from torchvision.transforms import transforms
import torch.utils.data
from torchsummary import summary

import matplotlib.pyplot as plt
import itertools

from pytorchCycleGANandpix2pix.models import networks as offnets
from pytorchCycleGANandpix2pix.util.image_pool import ImagePool
from pytorchCycleGANandpix2pix.util.util import tensor2im

import cyclegan_models

In [2]:
train_target_size = 1200
test_target_size = int(train_target_size * 0.2)
domain_folders = ["./output/pose_classified_game", "./output/pose_classified_movie"]
class_folders = ["Half Body"] # ["Full Body Sitting", "Full Body Standing", "Half Body", "Head Only"]#, "Other"]
img_file_ext = ".jpg"
store_folder = f"./selected_data/experiment_random_half_only_{time.time()}"
width = 128

In [3]:
def get_class_images(domain_folder, class_folders):
    images = {}
    total = 0
    
    for class_folder in class_folders:
        files = os.listdir(f"{domain_folder}/{class_folder}")
        image_files = [x for x in files if x.endswith(img_file_ext)]
        images[class_folder] = image_files
        total += len(image_files)
        
    return images, total

In [4]:
domain_X_imgs, domain_X_total = get_class_images(domain_folders[0], class_folders)
domain_X_dist = {k: len(domain_X_imgs[k]) / domain_X_total for k in domain_X_imgs.keys()}

In [5]:
domain_Y_imgs, domain_Y_total = get_class_images(domain_folders[1], class_folders)
domain_Y_dist = {k: len(domain_Y_imgs[k]) / domain_Y_total for k in domain_Y_imgs.keys()}

In [6]:
random_class_dist = {k: 1 / len(class_folders) for k in class_folders}

In [7]:
selected_sampling_dist_type = ["stratified_domain_X", "stratified_domain_Y", "random"][2]
selected_sampling_dist = {
    "stratified_domain_X": domain_X_dist,
    "stratified_domain_Y": domain_Y_dist,
    "random": random_class_dist
}[selected_sampling_dist_type]

In [8]:
resize_strategy = ["scale_smallest_side", "force_resize"][0]
# scale_smallest_side will scale to smallest side to <width> while maintaining the aspect ratio
# they are then cropped in the dataset for variation before going into the model during training

In [9]:
excess_allocation_strategy = ["random_reallocation", "ignore"][1]

In [10]:
def sample_images(class_folders, sampling_distribution, class_imgs, train_size, test_size, excess_allocation_strategy):
    # Start by figuring out how much of each class we want to actually sample
    expected_allocations = {class_name: round(sampling_distribution[class_name] * (train_size + test_size)) for class_name in class_folders}
    
    # Now check if we want more images than we actually have, if so we need to reallocate or fix the sample count
    # print({class_name: len(class_imgs[class_name]) for class_name in class_folders})
    # print(expected_allocations)
    
    excess_allocations = {class_name: abs(min(0, len(class_imgs[class_name]) - expected_allocations[class_name])) for class_name in class_folders}
    
    if excess_allocation_strategy == "random_reallocation":
        excluded_classes = set()
        
        while sum(excess_allocations.values()) != 0 and len(excluded_classes) != len(class_folders):
            classes_with_space = [class_name for class_name in excess_allocations.keys() if excess_allocations[class_name] == 0 and class_name not in excluded_classes]
            excess_amount_per_class = sum(excess_allocations.values()) // len(classes_with_space)
            
            for class_name in classes_with_space:
                expected_allocations[class_name] += excess_amount_per_class
            
            for class_name in excess_allocations.keys():
                if class_name not in classes_with_space:
                    expected_allocations[class_name] = len(class_imgs[class_name])
                    excluded_classes.add(class_name)
            
            excess_allocations = {class_name: abs(min(0, len(class_imgs[class_name]) - expected_allocations[class_name])) for class_name in class_folders}
    else:
        for class_name in excess_allocations.keys():
            if excess_allocations[class_name] > 0:
                expected_allocations[class_name] = len(class_imgs[class_name])
    
    # print(expected_allocations)
    
    train_split = {}
    test_split = {}
    
    for class_name in class_folders:
        total_allocation = expected_allocations[class_name]
        train_allocation = round(expected_allocations[class_name] * (train_size / (train_size + test_size)))
        
        sampled_class_imgs = random.sample(class_imgs[class_name], total_allocation)
        train_split[class_name] = sampled_class_imgs[:train_allocation]
        test_split[class_name] = sampled_class_imgs[train_allocation:]
    
    return train_split, test_split

In [11]:
domain_X_train_imgs, domain_X_test_imgs = sample_images(class_folders, selected_sampling_dist, domain_X_imgs, train_target_size, test_target_size, excess_allocation_strategy)

In [12]:
domain_Y_train_imgs, domain_Y_test_imgs = sample_images(class_folders, selected_sampling_dist, domain_Y_imgs, train_target_size, test_target_size, excess_allocation_strategy)

In [13]:
try:
    shutil.rmtree(store_folder)
except OSError:
    pass

os.makedirs(store_folder)
os.makedirs(f"{store_folder}/X/train")
os.makedirs(f"{store_folder}/X/test")
os.makedirs(f"{store_folder}/Y/train")
os.makedirs(f"{store_folder}/Y/test")

In [14]:
with open(f"{store_folder}/info.json", "w+") as fp:
    json.dump({
        "class_folders": class_folders,
        "domain_folders": domain_folders,
        "selected_sampling_dist_type": selected_sampling_dist_type,
        "selected_sampling_dist": selected_sampling_dist,
        "resize_strategy": resize_strategy,
        "excess_allocation_strategy": excess_allocation_strategy,
        "img_file_ext": img_file_ext,
        "store_folder": store_folder,
        "width": width,
        "train_target_size": train_target_size,
        "test_target_size": test_target_size,
        "X": {
            "org_dist": domain_X_dist,
            "train": {k: len(domain_X_train_imgs[k]) for k in class_folders},
            "test": {k: len(domain_X_test_imgs[k]) for k in class_folders}
        },
        "Y": {
            "org_dist": domain_Y_dist,
            "train": {k: len(domain_Y_train_imgs[k]) for k in class_folders},
            "test": {k: len(domain_Y_test_imgs[k]) for k in class_folders}
        }
    }, fp, indent=2)

In [15]:
def save_images(save_folder, original_folder, class_images, resize_strategy, img_file_ext, desired_size):
    for class_name in class_images.keys():
        save_class_name = "_".join(class_name.lower().split(" "))
        
        for i, image_name in enumerate(tqdm(class_images[class_name])):
            original_loc = f"{original_folder}/{class_name}/{image_name}"
            save_loc = f"{save_folder}/{save_class_name}_{i}{img_file_ext}"
            
            image = Image.open(original_loc)
            width, height = image.width, image.height
            
            if min(width, height) < 1.0 * desired_size: # maybe should try this with min(width, height) instead
                print("Skipped", image_name)
                continue
            
            new_width = desired_size
            new_height = desired_size
            
            if resize_strategy == "scale_smallest_side":
                if width > height:
                    new_width *= width / height
                    new_width = round(new_width)
                else:
                    new_height *= height / width
                    new_height = round(new_height)
            
            image = image.resize((new_width, new_height))
            image.save(save_loc)

In [16]:
save_images(f"{store_folder}/X/train", domain_folders[0], domain_X_train_imgs, resize_strategy, img_file_ext, width)
save_images(f"{store_folder}/X/test", domain_folders[0], domain_X_test_imgs, resize_strategy, img_file_ext, width)
save_images(f"{store_folder}/Y/train", domain_folders[1], domain_Y_train_imgs, resize_strategy, img_file_ext, width)
save_images(f"{store_folder}/Y/test", domain_folders[1], domain_Y_test_imgs, resize_strategy, img_file_ext, width)

  0%|          | 0/1200 [00:00<?, ?it/s]

Skipped Segmented_Train_Game_05_00029_7.jpg
Skipped Segmented_Train_Game_06_00188_4.jpg
Skipped Segmented_Train_Game_02_00058_6.jpg
Skipped Segmented_Train_Game_05_00029_3.jpg
Skipped Segmented_Train_Game_06_00403_5.jpg
Skipped Segmented_Train_Game_06_00430_4.jpg
Skipped Segmented_Train_Game_09_00559_0.jpg
Skipped Segmented_Train_Game_05_00011_8.jpg
Skipped Segmented_Train_Game_05_00108_5.jpg
Skipped Segmented_Train_Game_06_00434_5.jpg
Skipped Segmented_Train_Game_06_00422_4.jpg
Skipped Segmented_Train_Game_06_00376_3.jpg
Skipped Segmented_Train_Game_09_00547_1.jpg
Skipped Segmented_Train_Game_05_00156_9.jpg
Skipped Segmented_Train_Game_02_00173_2.jpg
Skipped Segmented_Train_Game_05_00040_7.jpg
Skipped Segmented_Train_Game_01_01048_1.jpg
Skipped Segmented_Train_Game_05_00030_2.jpg
Skipped Segmented_Train_Game_09_00166_1.jpg
Skipped Segmented_Train_Game_06_00408_4.jpg
Skipped Segmented_Train_Game_02_00300_4.jpg
Skipped Segmented_Train_Game_05_00042_8.jpg
Skipped Segmented_Train_Game_02_

  0%|          | 0/240 [00:00<?, ?it/s]

Skipped Segmented_Train_Game_06_00396_6.jpg
Skipped Segmented_Train_Game_06_00450_5.jpg
Skipped Segmented_Train_Game_06_00414_4.jpg
Skipped Segmented_Train_Game_05_00034_9.jpg
Skipped Segmented_Train_Game_02_00202_4.jpg
Skipped Segmented_Train_Game_01_01050_1.jpg
Skipped Segmented_Train_Game_05_00127_6.jpg
Skipped Segmented_Train_Game_06_00438_4.jpg
Skipped Segmented_Train_Game_02_00189_6.jpg
Skipped Segmented_Train_Game_06_00445_4.jpg
Skipped Segmented_Train_Game_02_00179_4.jpg
Skipped Segmented_Train_Game_06_00439_5.jpg
Skipped Segmented_Train_Game_05_00107_4.jpg
Skipped Segmented_Train_Game_02_00215_4.jpg
Skipped Segmented_Train_Game_05_00017_9.jpg


  0%|          | 0/1200 [00:00<?, ?it/s]

Skipped Segmented_Train_Movie_02_00062_0.jpg
Skipped Segmented_Train_Movie_03_00631_0.jpg
Skipped Segmented_Train_Movie_01_01443_0.jpg
Skipped Segmented_Train_Movie_03_00627_1.jpg
Skipped Segmented_Train_Movie_03_02200_0.jpg
Skipped Segmented_Train_Movie_02_00042_1.jpg
Skipped Segmented_Train_Movie_03_01358_1.jpg
Skipped Segmented_Train_Movie_03_00644_0.jpg
Skipped Segmented_Train_Movie_01_00703_1.jpg
Skipped Segmented_Train_Movie_01_00720_3.jpg
Skipped Segmented_Train_Movie_03_01275_4.jpg
Skipped Segmented_Train_Movie_03_00622_0.jpg
Skipped Segmented_Train_Movie_03_00315_0.jpg
Skipped Segmented_Train_Movie_01_01427_0.jpg
Skipped Segmented_Train_Movie_03_00617_1.jpg
Skipped Segmented_Train_Movie_03_00998_1.jpg
Skipped Segmented_Train_Movie_03_02262_1.jpg
Skipped Segmented_Train_Movie_01_01440_0.jpg
Skipped Segmented_Train_Movie_01_00687_1.jpg
Skipped Segmented_Train_Movie_02_00069_2.jpg
Skipped Segmented_Train_Movie_03_01296_2.jpg
Skipped Segmented_Train_Movie_03_00607_0.jpg
Skipped Se

  0%|          | 0/240 [00:00<?, ?it/s]

Skipped Segmented_Train_Movie_03_01006_2.jpg
Skipped Segmented_Train_Movie_03_01028_0.jpg
Skipped Segmented_Train_Movie_02_00060_1.jpg
Skipped Segmented_Train_Movie_02_00536_0.jpg
Skipped Segmented_Train_Movie_01_01346_0.jpg
Skipped Segmented_Train_Movie_03_01014_2.jpg
Skipped Segmented_Train_Movie_03_01381_3.jpg
Skipped Segmented_Train_Movie_03_01302_0.jpg
Skipped Segmented_Train_Movie_03_02206_0.jpg
Skipped Segmented_Train_Movie_03_00625_1.jpg
Skipped Segmented_Train_Movie_01_00136_2.jpg
Skipped Segmented_Train_Movie_01_01388_0.jpg
Skipped Segmented_Train_Movie_01_01411_0.jpg
Skipped Segmented_Train_Movie_03_00613_0.jpg
Skipped Segmented_Train_Movie_03_00626_2.jpg
Skipped Segmented_Train_Movie_01_00130_1.jpg
Skipped Segmented_Train_Movie_03_01389_0.jpg
Skipped Segmented_Train_Movie_02_00551_1.jpg
Skipped Segmented_Train_Movie_03_00615_0.jpg
Skipped Segmented_Train_Movie_03_01275_2.jpg
Skipped Segmented_Train_Movie_02_00054_2.jpg
Skipped Segmented_Train_Movie_03_00628_0.jpg
Skipped Se