In [1]:
import os
import glob
import json
from pathlib import Path
import pandas as pd

In [2]:
def parse_animal_json_files(animal_folder, animal_name):
    """
    animal folder - folder name up to 유증상
    """
    lesion_folders = os.listdir(animal_folder)
    dataset_list = []
    for lesion_folder in lesion_folders:
        lesion_folder_path = os.path.join(animal_folder, lesion_folder)
        filenames = glob.glob(os.path.join(lesion_folder_path, "*.json"))
        for filename in filenames:
            data_row = parse_json(filename)
            data_row["lesion_folder_name"] = lesion_folder
            data_row["animal"] = animal_name
            dataset_list.append(data_row)
    return dataset_list

In [3]:
def parse_json(full_path):
    json_obj = json.load(open(full_path, encoding="utf-8"))

    json_meta = json_obj["metaData"]
    parsed_data = {}
    parsed_data["breed"] = json_meta["breed"]
    parsed_data["age"] = json_meta["age"]
    parsed_data["gender"] = json_meta["gender"]
    parsed_data["region"] = json_meta["region"]
    parsed_data["species"] = json_meta["species"]
    parsed_data["lesions"] = json_meta["lesions"]
    parsed_data["identifier"] = json_meta["identifier"]
    
    #Get the path of the image
    parsed_data["image_name"] = os.path.basename(full_path).split(".")[0]+".jpg"
    
    #Convert the json filepath to image path with folder/a.json -> folder/a.jpg
    parsed_data["image_full_path"] = os.path.join(Path(full_path).parent, parsed_data["image_name"])
    
    #Convert the path to the unix style path for compatibility
    parsed_data["image_full_path"] = parsed_data["image_full_path"].replace("\\", "/")
    
    parsed_data["annotations"] = json_obj["labelingInfo"]
    #parsed_data["box"] = json_obj["labelingInfo"][1]["box"]["location"]
    return parsed_data

In [4]:
#folder names are written as up to 유증상
dogs_folder = "data/152.반려동물 피부질환 데이터/01.데이터/1.Training/2_라벨링데이터/TL01/반려견/피부/일반카메라/유증상/"
cats_folder = "data/152.반려동물 피부질환 데이터/01.데이터/1.Training/2_라벨링데이터/TL01/반려묘/피부/일반카메라/유증상"

parsed_dogs_data = parse_animal_json_files(dogs_folder, "반려견")
parsed_cats_data = parse_animal_json_files(cats_folder, "반려묘")
full_data = parsed_dogs_data + parsed_cats_data

df = pd.DataFrame(full_data)
df.to_csv("data/training_dataset.csv")
with open('data/training_dataset.json', 'w') as fout:
    json.dump(full_data, fout)

# Validation

In [5]:
#folder names are written as up to 유증상
val_dogs_folder = "data/152.반려동물 피부질환 데이터/01.데이터/2.Validation/2_라벨링데이터/VL01/반려견/피부/일반카메라/유증상/"
val_cats_folder = "data/152.반려동물 피부질환 데이터/01.데이터/2.Validation/2_라벨링데이터/VL01/반려묘/피부/일반카메라/유증상"

val_parsed_dogs_data = parse_animal_json_files(val_dogs_folder, "반려견")
val_parsed_cats_data = parse_animal_json_files(val_cats_folder, "반려묘")
val_full_data = val_parsed_dogs_data + val_parsed_cats_data

val_df = pd.DataFrame(val_full_data)
val_df.to_csv("data/validation_dataset.csv")
with open('data/validation_dataset.json', 'w') as fout:
    json.dump(val_full_data, fout)