In [1]:
import os
os.environ["FIFTYONE_REQUIREMENT_ERROR_LEVEL"]='1'
import fiftyone as fo
import fiftyone.brain as fob
import pandas as pd
import ast
import cv2
import numpy as np
from tqdm import tqdm
import random
import json
from PIL import Image

In [2]:
original_dataset = "../../datasets/ball_pythons/20230319_dataset_metadata.csv"
original_dataset_df = pd.read_csv(original_dataset, low_memory=False)

In [3]:
dataset_folder = "./../../datasets/ball_pythons/"

main_folder_list = os.listdir(dataset_folder)
morph_folder_list = [f"{dataset_folder}{i}/" for i in main_folder_list if "." not in i and "Pos" not in i]

morph_img_folder_dict = {}
for morph_folder in morph_folder_list:
    morph_img_folder_list = os.listdir(morph_folder)
    morph_img_folder_list = [f"{morph_folder}{i}/" for i in morph_img_folder_list]
    num_imgs = 0
    for morph_img_folder in morph_img_folder_list:
        num_imgs += len([i for i in os.listdir(morph_img_folder) if i.endswith(".png")])
    morph_img_folder_dict[morph_folder.split("/")[-2]] = {"img_folders": morph_img_folder_list,
    "num_imgs": num_imgs}

total = 0
json_paths_list = []
for morph, morph_dict in morph_img_folder_dict.items():
    img_folder_list = morph_dict["img_folders"]
    image_paths = [[i+k for k in os.listdir(i) if k.endswith(".png")] for i in img_folder_list]
    for folder in img_folder_list:
        json_paths = [folder+k for k in os.listdir(folder) if k.endswith(".json")]
        json_paths_list.extend(json_paths)
    # rand_samples = random.sample(image_paths,2)
    # fig, ax = plt.subplots(ncols=2, figsize=(18,9))
    # image = Image.open(rand_samples[0])
    # ax[0].imshow(image)
    # image = Image.open(rand_samples[1])
    # ax[1].imshow(image)
    # fig.suptitle(f"{morph} - 2/{morph_dict['num_imgs']}", fontsize=20)
    total += morph_dict["num_imgs"]
print(f"total imgs in dict: {total} - {len(morph_img_folder_dict)} morphs")

morph_details_dict = {}
all_image_details_dict = {}
for path in tqdm(json_paths_list):
    image_details_dict = {}
    cur_morph = path.split("/")[-3]
    if not morph_details_dict.get(cur_morph):
        morph_details_dict[cur_morph] = {}
    with open(path, "r") as f:
        image_details_dict = json.load(f)
        del image_details_dict["raw_details"]
    early_path = "/".join(path.split("/")[:-1])
    image_path = path.replace("_metadata.json", ".png")
    try:
        image = Image.open(image_path)
        image_details_dict["width"] = image.width
        image_details_dict["height"] = image.height
    except Exception as e:
        image_details_dict = {}
    if image_details_dict:
        morph_details_dict[cur_morph][image_path] = image_details_dict
        all_image_details_dict[image_path] = image_details_dict

metadata_df = pd.DataFrame.from_dict(all_image_details_dict, orient="index")

total imgs in dict: 27468 - 281 morphs


100%|██████████| 27468/27468 [00:17<00:00, 1585.19it/s]


In [4]:
metadata_df["image_path"] = metadata_df.index
metadata_df.head()

Unnamed: 0,sex,traits,dob,weight,width,height,price,proven_breeder,image_path
./../../datasets/ball_pythons/Black Belly/Black Belly_898263a1b8724361922d888d3cb8b436/Black Belly_d81a098b1cf9016a3e2b82faee52f735.png,female,[Black Belly],19th May 2021,1310g,844,1500,,,./../../datasets/ball_pythons/Black Belly/Blac...
./../../datasets/ball_pythons/Black Belly/Black Belly_898263a1b8724361922d888d3cb8b436/Black Belly_9d1df06772db6209796ba1fb97b803bd.png,female,[Black Belly],19th May 2021,1310g,1500,844,,,./../../datasets/ball_pythons/Black Belly/Blac...
./../../datasets/ball_pythons/66% Het Puzzle/66% Het Puzzle_1bbce1cdda524ecfbbe4aaa689b204ab/66% Het Puzzle_247138f9a2136da513419fd6343ed2ec.png,female,[66% Het Puzzle],16th December 2022,,1500,1500,,,./../../datasets/ball_pythons/66% Het Puzzle/6...
./../../datasets/ball_pythons/Fire/Fire_bb1b7d9bd2d39f46e5de2f2a202d1bf4/Fire_bb1b7d9bd2d39f46e5de2f2a202d1bf4.png,female,[Fire],,200g,550,540,US$125.00,,./../../datasets/ball_pythons/Fire/Fire_bb1b7d...
./../../datasets/ball_pythons/Fire/Fire_96ae567f0e404167a26c2d196d4e5f1a/Fire_06e4035580bd0306d51b698604a57c13.png,female,[Fire],,425g,1125,1500,US$50.00,,./../../datasets/ball_pythons/Fire/Fire_96ae56...


In [6]:
# samples = []
# for ind, row in original_dataset_df[:].iterrows():
#     image_path = row['image_path']
#     trait_combo = ast.literal_eval(row['traits'])
#     trait_combo.sort()
#     sex = row['sex']
    
#     sample = fo.Sample(filepath=image_path)
#     sample['trait_combo'] = fo.Metadata(values = trait_combo)

#     samples.append(sample)

# dataset = fo.Dataset("original-bp-20230319-dataset-test-full")
# dataset.add_samples(samples)

samples = []
for ind, row in metadata_df[:].iterrows():
    image_path = row['image_path']
    trait_combo = row['traits']
    trait_combo.sort()
    sex = row['sex']
    
    sample = fo.Sample(filepath=image_path)
    sample['trait_combo'] = trait_combo
    sample['sex'] = sex

    samples.append(sample)

dataset = fo.Dataset("expanded-bp-dataset-test-full-v4")
dataset.add_samples(samples)

 100% |█████████████| 27388/27388 [6.3s elapsed, 0s remaining, 4.3K samples/s]      


['6429fafe9fa9577d02da680e',
 '6429fafe9fa9577d02da680f',
 '6429fafe9fa9577d02da6810',
 '6429fafe9fa9577d02da6811',
 '6429fafe9fa9577d02da6812',
 '6429fafe9fa9577d02da6813',
 '6429fafe9fa9577d02da6814',
 '6429fafe9fa9577d02da6815',
 '6429fafe9fa9577d02da6816',
 '6429fafe9fa9577d02da6817',
 '6429fafe9fa9577d02da6818',
 '6429fafe9fa9577d02da6819',
 '6429fafe9fa9577d02da681a',
 '6429fafe9fa9577d02da681b',
 '6429fafe9fa9577d02da681c',
 '6429fafe9fa9577d02da681d',
 '6429fafe9fa9577d02da681e',
 '6429fafe9fa9577d02da681f',
 '6429fafe9fa9577d02da6820',
 '6429fafe9fa9577d02da6821',
 '6429fafe9fa9577d02da6822',
 '6429fafe9fa9577d02da6823',
 '6429fafe9fa9577d02da6824',
 '6429fafe9fa9577d02da6825',
 '6429fafe9fa9577d02da6826',
 '6429fafe9fa9577d02da6827',
 '6429fafe9fa9577d02da6828',
 '6429fafe9fa9577d02da6829',
 '6429fafe9fa9577d02da682a',
 '6429fafe9fa9577d02da682b',
 '6429fafe9fa9577d02da682c',
 '6429fafe9fa9577d02da682d',
 '6429fafe9fa9577d02da682e',
 '6429fafe9fa9577d02da682f',
 '6429fafe9fa9

In [7]:
session = fo.launch_app(dataset=dataset)

In [8]:
# pixels = embeddings
# img_pixels = []
# for f in tqdm(dataset.values('filepath')):
#     img = cv2.imread(f, cv2.IMREAD_UNCHANGED)
#     img = cv2.resize(img, (168, 168), cv2.INTER_CUBIC)
#     img_pixels.append(img.ravel())
# embeddings = np.array(img_pixels)

# create real embeddings
# img_samples = fo.core.collections.SampleCollection(dataset)
test_model = fo.zoo.load_zoo_model("vgg19-bn-imagenet-torch")
embeddings = fo.core.models.compute_embeddings(dataset, model=test_model, batch_size = 24, num_workers=12)



Model does not support batching
  15% |█------------|  4005/27388 [3.9m elapsed, 24.0m remaining, 11.9 samples/s]  

In [None]:
fob.compute_similarity(
    dataset,
    embeddings=embeddings,
    seed=51,
    progress_bar=True,
    brain_key='bp_sim',
)

In [None]:
results = fob.compute_visualization(
    dataset,
    embeddings=embeddings,
    num_dims=2,
    method="umap",
    verbose=True,
    seed=51,
    progress_bar=True,
    brain_key='bp_vis',
    batch_size=24,
)

In [None]:

plot = results.visualize(labels='sex')
plot.show(height=720)
session.plots.attach(plot)