# Generating modified dataset for new training

In [1]:
import os, json
import pandas as pd
import tqdm
import numpy as np
from PIL import Image
import torch
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_dir = "./data_mod/FSC/FSC_147"
split_images_file = "Train_Test_Val_FSC_147.json"
dot_annotations_file = "annotation_FSC147_384.json"
split_classes_file = "Split_Classes_FSC147.json"
descriptions_file = "./FSC-147-D.json"

img_class_txt = "ImageClasses_FSC147.txt"

with open(os.path.join(data_dir, split_images_file), 'r') as f:
    split_images = json.load(f)    
    
with open(os.path.join(data_dir, dot_annotations_file), 'r') as f:
    dot_annotations = json.load(f)
    
with open( descriptions_file, 'r') as f:
    descriptions = json.load(f)
    
with open(os.path.join(data_dir, split_classes_file), 'r') as f:
    split_classes = json.load(f)
    
img_classes = {}

with open(os.path.join(data_dir, img_class_txt), 'r') as file:
    for line in file:
        line = line.strip().split('\t')
        if len(line) == 2:
            img_name, label = line
            img_classes[img_name] = label

In [3]:
train_list = split_images['train']
val_list = split_images['val']
train_val_images = train_list + val_list

In [4]:
img_directory = './data_mod/FSC/images_384_VarV2'
density_directory = './data_mod/FSC/gt_density_map_adaptive_384_VarV2'

In [5]:
random.seed(123)
new_im_id = 7715

for img_filename in tqdm.tqdm(train_val_images):
    
    img = Image.open(os.path.join(img_directory, img_filename))
    img.load()
    
    new_filename = f"{new_im_id}.jpg"
    img.save(os.path.join(img_directory, new_filename))
    
    loaded_density_map = np.load(os.path.join(density_directory, img_filename.split('.')[0] + '.npy' ))
    density_shape = loaded_density_map.shape
    density_map = np.zeros(density_shape, dtype=np.float32)
    
    density_filename = f"{new_im_id}.npy"
    np.save(os.path.join(density_directory, density_filename), density_map)
    
    if img_filename in train_list:
        split_images['train'].extend([new_filename])
    elif img_filename in val_list:
        split_images['val'].extend([new_filename])
    
    cur_img_class = img_classes[img_filename]
    new_img_class = cur_img_class
    
    if img_filename in train_list:
        while new_img_class == cur_img_class:
            new_img_class = split_classes['train'][random.randint(0, len(split_classes['train']) - 1)]
    elif img_filename in val_list:
         while new_img_class == cur_img_class:
            new_img_class = split_classes['val'][random.randint(0, len(split_classes['val']) - 1)]
            
    img_classes[new_filename] = new_img_class
    
    new_image_descr = {
        "data_split": "train" if img_filename in train_list else "val",
        "text_description": f"the {new_img_class}"
    }

    descriptions[new_filename] = new_image_descr
    
    H = dot_annotations[img_filename]["H"]
    W = dot_annotations[img_filename]["W"]
    #box_examples_coordinates = dot_annotations[img_filename]["box_examples_coordinates"]
    #box_examples_path = dot_annotations[img_filename]["box_examples_path"]
    #density_path = dot_annotations[img_filename]["density_path"]
    #density_path_fixed = dot_annotations[img_filename]["density_path_fixed"]
    #img_path = dot_annotations[img_filename]["img_path"]
    #points = dot_annotations[img_filename]["points"]
    r = dot_annotations[img_filename]["r"]
    ratio_h = dot_annotations[img_filename]["ratio_h"]
    ratio_w = dot_annotations[img_filename]["ratio_w"]
    
    new_image_annotation = {
        "H": H,
        "W": W,
        "box_examples_coordinates": [],
        "box_examples_path": '',
        "density_path": '',
        "density_path_fixed": '',
        "img_path": '',
        "points": [],
        "r": r,
        "ratio_h": ratio_h,
        "ratio_w": ratio_w
    }
    
    dot_annotations[new_filename] = new_image_annotation
    
    new_im_id += 1

100%|██████████| 4945/4945 [01:23<00:00, 58.93it/s] 


In [6]:
split_images_mod_file = "Train_Test_Val_FSC_147_mod.json"
img_class_mod_txt = "ImageClasses_FSC147_mod.txt"
descriptions_mod_file = "./FSC-147-D_mod.json"
dot_annotations_mod_file = "annotation_FSC147_384_mod.json"

with open(os.path.join(data_dir, split_images_mod_file), 'w') as file:
    json.dump(split_images, file, indent=4)
    
with open(os.path.join(data_dir, img_class_mod_txt), 'w') as file:
    for key, value in img_classes.items():
        file.write(f"{key}\t{value}\n")
        
with open(descriptions_mod_file, 'w') as file:
    json.dump(descriptions, file, indent=4)
    
with open(os.path.join(data_dir, dot_annotations_mod_file), 'w') as file:
    json.dump(dot_annotations, file, indent=4)

### Check on created density maps

In [7]:
loaded_density_map = np.load(os.path.join(density_directory, '7715.npy'.split('.')[0] + '.npy' ))

In [8]:
tensor = torch.Tensor(loaded_density_map)
torch.sum(tensor).item()

0.0

In [14]:
points = dot_annotations['7715.jpg']["points"]
len(points)

0