In [1]:
import json
import pandas as pd
import urllib.parse
import os
import hashlib
import random
from collections import defaultdict

In [2]:
def short_hash(input_string, len_hash=12):
    hash_hex = hashlib.md5(input_string.encode()).hexdigest()[:len_hash]
    return int(hash_hex, 16)

## do for each domain

In [21]:
json_data = {}

## categories

In [22]:
## get the category ids first.

id_to_classname = {}
with open("photo_crossval_kfold.txt") as fh:
    for l in fh.readlines():
        file_path, label = l.strip().split()
        classname = file_path.split("/")[1]
        id_to_classname[int(label)-1] = classname
classname_to_id = {v:k for k,v in id_to_classname.items()}

In [23]:
classname_to_id

{'dog': 0,
 'elephant': 1,
 'giraffe': 2,
 'guitar': 3,
 'horse': 4,
 'house': 5,
 'person': 6}

In [24]:
categories = [{"category_name":cname,  "category_id":idx} for idx, cname in id_to_classname.items()]
json_data['categories'] = categories

In [27]:

for domain in ["art", "cartoon", "sketch", "photo"]:
    for split in ["train", "val"]:
        print("Creating for {}/{}".format(domain, split))
        fid_to_filename = {}
        fid_to_label = {}
        fid_to_category = {}
        
        extra_str = "_painting" if domain == "art" else ""
        split_file = "cross{}".format(split) if split == "val" else split
        
        file_name = "{}_{}_kfold.txt".format(domain+extra_str, split_file)
        print(file_name)
        with open(file_name) as fh:
            for l in fh.readlines():
                file_path, label = l.strip().split()
                label = int(label) - 1

#                 file_name = file_path.split("/")[-1].split(".")[0]
                fid = short_hash(file_path)
                fid_to_filename[fid] = file_path
                fid_to_label[fid] = label
                fid_to_category[fid] = id_to_classname[label]

        all_ids = list(fid_to_category.keys())

        ## images

        images = []

        for fid in all_ids:
            images.append({
                "filename" : fid_to_filename[fid],
                "id"       : int(fid),
            })

        ## annotations

        anns = []

        for fid in all_ids:

            anns.append({
                "image_id" : int(fid),
                "category" : fid_to_label[fid],
                'class_name' : fid_to_category[fid]
            })

        ## metadata

        meta = []

        for fid in all_ids:
            fid = int(fid)
            fname = fid_to_filename[fid]
            file_tag = fname#.split("/")[-1].split(".")[0]

            meta.append({
                'image_id' : fid,
                'file_tag' : file_tag
            })

        
        split = "test" if split == "val" else split
        json_data["{}_{}".format(domain, split)] = {
            "images" : images,
            "annotations" : anns,
            "metadata"    : meta
        }

Creating for art/train
art_painting_train_kfold.txt
Creating for art/val
art_painting_crossval_kfold.txt
Creating for cartoon/train
cartoon_train_kfold.txt
Creating for cartoon/val
cartoon_crossval_kfold.txt
Creating for sketch/train
sketch_train_kfold.txt
Creating for sketch/val
sketch_crossval_kfold.txt
Creating for photo/train
photo_train_kfold.txt
Creating for photo/val
photo_crossval_kfold.txt


In [28]:
for k in json_data.keys():
    if k != "categories":
        print(k, len(json_data[k]['images']))

art_train 1840
art_test 208
cartoon_train 2107
cartoon_test 237
sketch_train 3531
sketch_test 398
photo_train 1499
photo_test 171


In [29]:
json_data['photo_train']['metadata'][1000]

{'image_id': 166792350274328, 'file_tag': 'photo/house/pic_205.jpg'}

In [30]:
with open("../../metadata/pacs.json", "w") as fh:
    json.dump(json_data, fh, indent=4)

## check

In [None]:
import json
import os

In [None]:
data = json.load(open("../../metadata/visda2017.json"))

In [None]:
data.keys()

In [None]:
data['syn_train']["annotations"]