In [1]:
import os
os.environ["FIFTYONE_REQUIREMENT_ERROR_LEVEL"]='1'
import fiftyone as fo
import fiftyone.brain as fob
import pandas as pd
import ast
import cv2
import numpy as np
from tqdm import tqdm
import random
import json
from PIL import Image

In [2]:
original_dataset = "../../datasets/ball_pythons/20230319_dataset_metadata.csv"
original_dataset_df = pd.read_csv(original_dataset, low_memory=False)

In [3]:
dataset_folder = "./../../datasets/ball_pythons/"

main_folder_list = os.listdir(dataset_folder)
morph_folder_list = [f"{dataset_folder}{i}/" for i in main_folder_list if "." not in i and "Pos" not in i]

morph_img_folder_dict = {}
for morph_folder in morph_folder_list:
    morph_img_folder_list = os.listdir(morph_folder)
    morph_img_folder_list = [f"{morph_folder}{i}/" for i in morph_img_folder_list]
    num_imgs = 0
    for morph_img_folder in morph_img_folder_list:
        num_imgs += len([i for i in os.listdir(morph_img_folder) if i.endswith(".png")])
    morph_img_folder_dict[morph_folder.split("/")[-2]] = {"img_folders": morph_img_folder_list,
    "num_imgs": num_imgs}

total = 0
json_paths_list = []
for morph, morph_dict in morph_img_folder_dict.items():
    img_folder_list = morph_dict["img_folders"]
    image_paths = [[i+k for k in os.listdir(i) if k.endswith(".png")] for i in img_folder_list]
    for folder in img_folder_list:
        json_paths = [folder+k for k in os.listdir(folder) if k.endswith(".json")]
        json_paths_list.extend(json_paths)
    # rand_samples = random.sample(image_paths,2)
    # fig, ax = plt.subplots(ncols=2, figsize=(18,9))
    # image = Image.open(rand_samples[0])
    # ax[0].imshow(image)
    # image = Image.open(rand_samples[1])
    # ax[1].imshow(image)
    # fig.suptitle(f"{morph} - 2/{morph_dict['num_imgs']}", fontsize=20)
    total += morph_dict["num_imgs"]
print(f"total imgs in dict: {total} - {len(morph_img_folder_dict)} morphs")

morph_details_dict = {}
all_image_details_dict = {}
for path in tqdm(json_paths_list):
    image_details_dict = {}
    cur_morph = path.split("/")[-3]
    if not morph_details_dict.get(cur_morph):
        morph_details_dict[cur_morph] = {}
    with open(path, "r") as f:
        image_details_dict = json.load(f)
        image_details_dict["raw_details"]
    early_path = "/".join(path.split("/")[:-1])
    image_path = path.replace("_metadata.json", ".png")
    try:
        image = Image.open(image_path)
        image_details_dict["width"] = image.width
        image_details_dict["height"] = image.height
    except Exception as e:
        image_details_dict = {}
    if image_details_dict:
        morph_details_dict[cur_morph][image_path] = image_details_dict
        all_image_details_dict[image_path] = image_details_dict

metadata_df = pd.DataFrame.from_dict(all_image_details_dict, orient="index")

total imgs in dict: 27468 - 281 morphs


100%|██████████| 27468/27468 [00:08<00:00, 3098.88it/s]


In [4]:
metadata_df["image_path"] = metadata_df.index
metadata_df.head()

Unnamed: 0,raw_details,sex,traits,dob,weight,width,height,price,proven_breeder,image_path
./../../datasets/ball_pythons/Black Belly/Black Belly_898263a1b8724361922d888d3cb8b436/Black Belly_d81a098b1cf9016a3e2b82faee52f735.png,"<div class=""base-mm-design-card snake-info-car...",female,[Black Belly],19th May 2021,1310g,844,1500,,,./../../datasets/ball_pythons/Black Belly/Blac...
./../../datasets/ball_pythons/Black Belly/Black Belly_898263a1b8724361922d888d3cb8b436/Black Belly_9d1df06772db6209796ba1fb97b803bd.png,"<div class=""base-mm-design-card snake-info-car...",female,[Black Belly],19th May 2021,1310g,1500,844,,,./../../datasets/ball_pythons/Black Belly/Blac...
./../../datasets/ball_pythons/66% Het Puzzle/66% Het Puzzle_1bbce1cdda524ecfbbe4aaa689b204ab/66% Het Puzzle_247138f9a2136da513419fd6343ed2ec.png,"<div class=""base-mm-design-card snake-info-car...",female,[66% Het Puzzle],16th December 2022,,1500,1500,,,./../../datasets/ball_pythons/66% Het Puzzle/6...
./../../datasets/ball_pythons/Fire/Fire_bb1b7d9bd2d39f46e5de2f2a202d1bf4/Fire_bb1b7d9bd2d39f46e5de2f2a202d1bf4.png,"<div class=""details"">\n <dl class=""dl-horizont...",female,[Fire],,200g,550,540,US$125.00,,./../../datasets/ball_pythons/Fire/Fire_bb1b7d...
./../../datasets/ball_pythons/Fire/Fire_96ae567f0e404167a26c2d196d4e5f1a/Fire_06e4035580bd0306d51b698604a57c13.png,"<div class=""base-mm-design-card snake-info-car...",female,[Fire],,425g,1125,1500,US$50.00,,./../../datasets/ball_pythons/Fire/Fire_96ae56...


In [22]:
# samples = []
# for ind, row in original_dataset_df[:].iterrows():
#     image_path = row['image_path']
#     trait_combo = ast.literal_eval(row['traits'])
#     trait_combo.sort()
#     sex = row['sex']
    
#     sample = fo.Sample(filepath=image_path)
#     sample['trait_combo'] = fo.Metadata(values = trait_combo)

#     samples.append(sample)

# dataset = fo.Dataset("original-bp-20230319-dataset-test-full")
# dataset.add_samples(samples)

samples = []
for ind, row in metadata_df[:].iterrows():
    image_path = row['image_path']
    trait_combo = row['traits']
    trait_combo.sort()
    sex = row['sex']
    price = row['price']
    if type(price) == str:
        try:
            price = float(price.split('$')[-1])
        except ValueError:
            price = None
    else:
        price = None
    weight = row['weight']
    if type(weight) == str:
        weight = float(weight.replace("g",""))
    else:
        weight = None
    raw_details = row['raw_details']
    is_ball_python = True
    if type(raw_details) == str:
        if 'Ball Python' in raw_details or '/ball-pythons/trait' in raw_details:
            is_ball_python = True
        else:
            is_ball_python = False
    else:
        is_ball_python = None
    
    sample = fo.Sample(filepath=image_path)
    sample['trait_combo'] = trait_combo
    sample['sex'] = sex
    sample['price'] = price
    sample['weight'] = weight
    sample['is_ball_python'] = is_ball_python


    samples.append(sample)

dataset = fo.Dataset("expanded-bp-dataset-final-full-v1dot1")
dataset.add_samples(samples)

 100% |█████████████| 27388/27388 [7.2s elapsed, 0s remaining, 4.0K samples/s]       


['642a0b97d1b8e493aef6b73d',
 '642a0b97d1b8e493aef6b73e',
 '642a0b97d1b8e493aef6b73f',
 '642a0b97d1b8e493aef6b740',
 '642a0b97d1b8e493aef6b741',
 '642a0b97d1b8e493aef6b742',
 '642a0b97d1b8e493aef6b743',
 '642a0b97d1b8e493aef6b744',
 '642a0b97d1b8e493aef6b745',
 '642a0b97d1b8e493aef6b746',
 '642a0b97d1b8e493aef6b747',
 '642a0b97d1b8e493aef6b748',
 '642a0b97d1b8e493aef6b749',
 '642a0b97d1b8e493aef6b74a',
 '642a0b97d1b8e493aef6b74b',
 '642a0b97d1b8e493aef6b74c',
 '642a0b97d1b8e493aef6b74d',
 '642a0b97d1b8e493aef6b74e',
 '642a0b97d1b8e493aef6b74f',
 '642a0b97d1b8e493aef6b750',
 '642a0b97d1b8e493aef6b751',
 '642a0b97d1b8e493aef6b752',
 '642a0b97d1b8e493aef6b753',
 '642a0b97d1b8e493aef6b754',
 '642a0b97d1b8e493aef6b755',
 '642a0b97d1b8e493aef6b756',
 '642a0b97d1b8e493aef6b757',
 '642a0b97d1b8e493aef6b758',
 '642a0b97d1b8e493aef6b759',
 '642a0b97d1b8e493aef6b75a',
 '642a0b97d1b8e493aef6b75b',
 '642a0b97d1b8e493aef6b75c',
 '642a0b97d1b8e493aef6b75d',
 '642a0b97d1b8e493aef6b75e',
 '642a0b97d1b8

In [23]:
session = fo.launch_app(dataset=dataset)

In [31]:
# pixels = embeddings
# img_pixels = []
# for f in tqdm(dataset.values('filepath')):
#     img = cv2.imread(f, cv2.IMREAD_UNCHANGED)
#     img = cv2.resize(img, (168, 168), cv2.INTER_CUBIC)
#     img_pixels.append(img.ravel())
# embeddings = np.array(img_pixels)

# create real embeddings
# img_samples = fo.core.collections.SampleCollection(dataset)
test_model = fo.zoo.load_zoo_model("vgg19-bn-imagenet-torch")
embeddings = fo.core.models.compute_embeddings(dataset, model=test_model, batch_size = 24, num_workers=12)


The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.


Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG19_BN_Weights.IMAGENET1K_V1`. You can also use `weights=VGG19_BN_Weights.DEFAULT` to get the most up-to-date weights.



Model does not support batching
 100% |█████████████| 27388/27388 [26.6m elapsed, 0s remaining, 21.1 samples/s]      


In [32]:
fob.compute_similarity(
    dataset,
    embeddings=embeddings,
    seed=51,
    progress_bar=True,
    brain_key='bp_sim_vgg19bn',
)

Ignoring unsupported parameters {'seed', 'progress_bar'} for <class 'fiftyone.brain.internal.core.sklearn.SklearnSimilarityConfig'>


<fiftyone.brain.internal.core.sklearn.SklearnSimilarityIndex at 0x7fbe73f7e050>

In [33]:
results = fob.compute_visualization(
    dataset,
    embeddings=embeddings,
    num_dims=2,
    method="umap",
    verbose=True,
    seed=51,
    progress_bar=True,
    brain_key='bp_vis_vgg19bn',
    batch_size=24,
)

Ignoring unsupported parameters {'progress_bar'} for <class 'fiftyone.brain.visualization.UMAPVisualizationConfig'>
Generating visualization...
UMAP(random_state=51, verbose=True)
Mon Apr  3 00:06:20 2023 Construct fuzzy simplicial set
Mon Apr  3 00:06:20 2023 Finding Nearest Neighbors
Mon Apr  3 00:06:20 2023 Building RP forest with 13 trees
Mon Apr  3 00:06:24 2023 NN descent for 15 iterations
	 1  /  15
	 2  /  15
	 3  /  15
	 4  /  15
	 5  /  15
	Stopping threshold met -- exiting after 5 iterations
Mon Apr  3 00:06:28 2023 Finished Nearest Neighbor Search
Mon Apr  3 00:06:28 2023 Construct embedding


Epochs completed:   0%|            0/200 [00:00]

Mon Apr  3 00:06:53 2023 Finished embedding


In [28]:
for sample in dataset.iter_samples(progress=True):
    sample['is_bp_str'] = str(sample['is_ball_python'])
    sample.save()

 100% |█████████████| 27388/27388 [31.9s elapsed, 0s remaining, 913.0 samples/s]      


In [34]:

plot = results.visualize(labels='is_bp_str')
plot.show(height=960)
session.plots.attach(plot)

The requested operation requires that 'ipywidgets<8,>=7.5' is installed on your machine, but found 'ipywidgets==8.0.4'.




FigureWidget({
    'data': [{'customdata': array(['642a0b97d1b8e493aef6b7a7', '642a0b97d1b8e493aef6b7a8',
                                   '642a0b97d1b8e493aef6b7b1', ..., '642a0b9ed1b8e493aef721d6',
                                   '642a0b9ed1b8e493aef721d7', '642a0b9ed1b8e493aef721d8'], dtype=object),
              'hovertemplate': ('<b>is_bp_str: %{text}</b><br>x' ... ': %{customdata}<extra></extra>'),
              'line': {'color': '#3366CC'},
              'mode': 'markers',
              'name': 'False',
              'showlegend': True,
              'text': array(['False', 'False', 'False', ..., 'False', 'False', 'False'], dtype='<U5'),
              'type': 'scattergl',
              'uid': 'a03139a2-ba03-4bb0-adb3-4c9812b4008e',
              'x': array([ 0.18858184, -0.48532426, -1.612891  , ...,  1.41624   ,  1.3258724 ,
                           1.3600205 ], dtype=float32),
              'y': array([4.2800746, 2.2468097, 0.5398853, ..., 2.935968 , 3.049349 , 2.911366

In [None]:
dataset.save()