In [None]:
import fiftyone as fo
import fiftyone.utils.openimages as openimages
import random
import pandas as pd
import numpy as np

In [5]:
dataset = fo.zoo.load_zoo_dataset(
          "open-images-v6",
          split="validation", 
        download_if_necessary  =  True,  
      )

Downloading split 'validation' to '/Users/louisfouquet/fiftyone/open-images-v6/Raw/validation' if necessary
Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-attributes-description.csv' to '/Users/louisfouquet/fiftyone/open-images-v6/Raw/validation/metadata/attributes.csv'
Downloading 'https://storage.googleapis.com/openimages/v5/classes-segmentation.txt' to '/Users/louisfouquet/fiftyone/open-images-v6/Raw/validation/metadata/segmentation_classes.csv'
Downloading 'https://storage.googleapis.com/openimages/v5/validation-annotations-human-imagelabels-boxable.csv' to '/Users/louisfouquet/fiftyone/open-images-v6/Raw/validation/labels/classifications.csv'
Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-validation-annotations-vrd.csv' to '/Users/louisfouquet/fiftyone/open-images-v6/Raw/validation/labels/relationships.csv'
Downloading 'https://storage.googleapis.com/openimages/v5/validation-annotations-object-segmentation.csv' to '/Users/louisfouquet/fiftyone/ope

In [7]:
df = pd.read_csv('./fiftyone/open-images-v6/Raw/validation/labels/detections.csv').iloc[:,:3]
df_label = pd.read_csv('./fiftyone/open-images-v6/oidv7-class-descriptions.csv')
df_tot = pd.merge(df, df_label, left_on = 'LabelName', right_on = 'LabelName') #Merge LabelName and Display Name
df_tot = df_tot.drop(['Source'], axis = 1)
df_tot 

Unnamed: 0,ImageID,LabelName,DisplayName
0,0001eeaf4aed83f9,/m/0cmf2,Fixed-wing aircraft
1,0009bad4d8539bb4,/m/0cmf2,Fixed-wing aircraft
2,0019e544c79847f5,/m/0cmf2,Fixed-wing aircraft
3,0019e544c79847f5,/m/0cmf2,Fixed-wing aircraft
4,007384da2ed0464f,/m/0cmf2,Fixed-wing aircraft
...,...,...,...
303975,ef11d2f491f7af3f,/m/05bm6,Nail (Construction)
303976,dea86ba2060f08c2,/m/0f571,Diaper
303977,f3e368998a146c69,/m/076lb9,Training bench
303978,f3e368998a146c69,/m/076lb9,Training bench


In [8]:
#Create a pivot table with for each image the objects within
pivot = pd.pivot_table(df_tot, values = 'LabelName', index = 'ImageID', columns = 'DisplayName', aggfunc='count', fill_value = 0)
pivot = pivot.loc[:, np.sum(pivot > 0) > 200] # Filter classes  contained in more than 200 images
pivot

DisplayName,Animal,Auto part,Baked goods,Ball (Object),Bicycle,Bicycle wheel,Bird,Boat,Bottle,Boy,...,Toy,Tree,Trousers,Truck,Vegetable,Vehicle,Vehicle registration plate,Wheel,Window,Woman
ImageID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0001eeaf4aed83f9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000595fe6fee6369,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00075905539074f2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0007cebe1b2ba653,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
0007d6cf88afaa4a,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fff3ce694bc02a09,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fff50186c03c8474,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fff820866f567015,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,1,0,0
fffc2f36b181a4fb,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
#Compute the co-occurence matrix
cocc = pivot.T.dot(pivot)
np.fill_diagonal(cocc.values, 0)
cocc

DisplayName,Animal,Auto part,Baked goods,Ball (Object),Bicycle,Bicycle wheel,Bird,Boat,Bottle,Boy,...,Toy,Tree,Trousers,Truck,Vegetable,Vehicle,Vehicle registration plate,Wheel,Window,Woman
DisplayName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Animal,0,0,0,0,0,0,26,0,1,2,...,25,133,0,0,0,6,0,13,3,5
Auto part,0,0,0,0,22,30,0,35,0,2,...,19,450,1,262,0,452,680,6956,1789,49
Baked goods,0,0,0,0,0,0,0,0,4,1,...,26,0,0,0,29,0,0,0,0,83
Ball (Object),0,0,0,0,1,2,0,0,0,89,...,31,5,2,0,0,22,0,4,0,145
Bicycle,0,22,0,1,0,1624,0,2,1,20,...,0,204,1,0,0,132,24,1526,4,38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vehicle,6,452,0,22,132,212,6,534,12,49,...,61,1026,20,100,0,0,245,4007,830,181
Vehicle registration plate,0,680,0,0,24,28,0,4,0,1,...,0,198,0,23,0,245,0,2803,853,15
Wheel,13,6956,0,4,1526,3390,0,35,2,89,...,142,2422,19,1057,0,4007,2803,0,5529,344
Window,3,1789,0,0,4,8,2,13,0,4,...,47,6358,4,145,0,830,853,5529,0,72


In [10]:
def create_classes_lists(cocc, n_lists, n_classes):
    classes_list = []
    for i in range(n_lists):
        init_class = np.random.choice(cocc.columns)
        all_classes = cocc[init_class].sample(n_classes - 1, weights = cocc[init_class]).index.tolist()
        all_classes.append(init_class) ## Add first class to the list
        classes_list.append(all_classes)
    return classes_list

In [12]:
classes_list = create_classes_lists(cocc, 30, 5)
classes_list

[['Snack', 'Person', 'Salad', 'Vegetable', 'Fast food'],
 ['Vehicle', 'Human hair', 'Mammal', 'Auto part', 'Boat'],
 ['Wheel', 'Bicycle wheel', 'Clothing', 'Sports equipment', 'Bicycle'],
 ['Human leg', 'Human hand', 'Clothing', 'Human hair', 'Human arm'],
 ['Human mouth', 'Mammal', 'Human head', 'Girl', 'Human hair'],
 ['Plant', 'Window', 'Wheel', 'Person', 'Houseplant'],
 ['Human hair', 'Human arm', 'Person', 'Sports equipment', 'Footwear'],
 ['Food', 'Fish', 'Salad', 'Snack', 'Seafood'],
 ['Human head', 'Sports equipment', 'Human body', 'Human hand', 'Girl'],
 ['Footwear', 'Boy', 'Mammal', 'Sports equipment', 'Sports uniform'],
 ['Human leg', 'Mammal', 'Woman', 'Tree', 'Boat'],
 ['Wheel', 'Person', 'Clothing', 'Man', 'Fixed-wing aircraft'],
 ['Plant', 'Clothing', 'Tire', 'Car', 'Land vehicle'],
 ['Dog', 'Animal', 'Human face', 'Clothing', 'Carnivore'],
 ['Clothing', 'Dog', 'Human hair', 'Mammal', 'Animal'],
 ['Person', 'Human hair', 'Drink', 'Woman', 'Tableware'],
 ['Tree', 'Mammal'

In [None]:
# Delete all datasets from fiftyone
for name in fo.list_datasets() : 
    dataset = fo.load_dataset(name)
    dataset.delete()

In [14]:
class_list = openimages.get_classes(version='v6', dataset_dir=None)
classes_saved = []
num_trial = 0
i=1
while i <21:
    classes = classes_list[num_trial]
    print(classes)
    
    dataset = fo.zoo.load_zoo_dataset(
              "open-images-v6",
              split="validation",
              label_types=["detections"], 
            only_matching = True, 
            classes = classes,
            download_if_necessary  =  False,  
            dataset_dir = f"./fiftyone/open-images-v6/Raw/",
            max_samples = 1200,
            dataset_name = f'dataset_{num_trial}'
          )
    print(len(dataset))
    if len(dataset) == 1200:
        dataset.export(export_dir = f'./fiftyone/open-images-v6/Export/dataset_{i}/', 
               dataset_type = fo.types.VOCDetectionDataset)
        classes_saved.append(classes)
        i+=1
        print('i :', i)
    num_trial +=1 

Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/var/folders/yx/jwys6z8s7kdfb76vn404_81c0000gp/T/tmprmnb59r1/metadata/classes.csv'
['Snack', 'Person', 'Salad', 'Vegetable', 'Fast food']
Loading 'open-images-v6' split 'validation'
 100% |███████████████| 1200/1200 [4.9s elapsed, 0s remaining, 251.5 samples/s]      
Dataset 'dataset_0' created
1200
 100% |███████████████| 1200/1200 [16.5s elapsed, 0s remaining, 75.5 samples/s]      
i : 2
['Vehicle', 'Human hair', 'Mammal', 'Auto part', 'Boat']
Loading 'open-images-v6' split 'validation'
 100% |███████████████| 1200/1200 [6.9s elapsed, 0s remaining, 168.3 samples/s]      
Dataset 'dataset_1' created
1200
 100% |███████████████| 1200/1200 [14.6s elapsed, 0s remaining, 88.6 samples/s]      
i : 3
['Wheel', 'Bicycle wheel', 'Clothing', 'Sports equipment', 'Bicycle']
Loading 'open-images-v6' split 'validation'
 100% |███████████████| 1200/1200 [8.4s elapsed, 0s remaining, 139.9 samples/s]      
D

In [15]:
classes_saved

[['Snack', 'Person', 'Salad', 'Vegetable', 'Fast food'],
 ['Vehicle', 'Human hair', 'Mammal', 'Auto part', 'Boat'],
 ['Wheel', 'Bicycle wheel', 'Clothing', 'Sports equipment', 'Bicycle'],
 ['Human leg', 'Human hand', 'Clothing', 'Human hair', 'Human arm'],
 ['Human mouth', 'Mammal', 'Human head', 'Girl', 'Human hair'],
 ['Plant', 'Window', 'Wheel', 'Person', 'Houseplant'],
 ['Human hair', 'Human arm', 'Person', 'Sports equipment', 'Footwear'],
 ['Food', 'Fish', 'Salad', 'Snack', 'Seafood'],
 ['Human head', 'Sports equipment', 'Human body', 'Human hand', 'Girl'],
 ['Footwear', 'Boy', 'Mammal', 'Sports equipment', 'Sports uniform'],
 ['Human leg', 'Mammal', 'Woman', 'Tree', 'Boat'],
 ['Wheel', 'Person', 'Clothing', 'Man', 'Fixed-wing aircraft'],
 ['Plant', 'Clothing', 'Tire', 'Car', 'Land vehicle'],
 ['Dog', 'Animal', 'Human face', 'Clothing', 'Carnivore'],
 ['Clothing', 'Dog', 'Human hair', 'Mammal', 'Animal'],
 ['Person', 'Human hair', 'Drink', 'Woman', 'Tableware'],
 ['Tree', 'Mammal'