In [1]:
import numpy as np
import pandas as pd
import os

import random

import shutil

from skimage.transform import resize   # Image Processing Library
import matplotlib.pyplot as plt
from tqdm import tqdm
import gc    # Garbage Collector
gc.collect()

import json

from collections import defaultdict

from PIL import Image

In [2]:
# Input data files are available in the "../../../fiftyone/coco-2014" directory.
print(os.listdir("../../fiftyone/caltech101"))

['accordion', 'airplanes', 'anchor', 'ant', 'BACKGROUND_Google', 'barrel', 'bass', 'beaver', 'binocular', 'bonsai', 'brain', 'brontosaurus', 'buddha', 'butterfly', 'camera', 'cannon', 'car_side', 'ceiling_fan', 'cellphone', 'chair', 'chandelier', 'cougar_body', 'cougar_face', 'crab', 'crayfish', 'crocodile', 'crocodile_head', 'cup', 'dalmatian', 'dollar_bill', 'dolphin', 'dragonfly', 'electric_guitar', 'elephant', 'emu', 'euphonium', 'ewer', 'Faces', 'Faces_easy', 'ferry', 'flamingo', 'flamingo_head', 'garfield', 'gerenuk', 'gramophone', 'grand_piano', 'hawksbill', 'headphone', 'hedgehog', 'helicopter', 'ibis', 'info.json', 'inline_skate', 'joshua_tree', 'kangaroo', 'ketch', 'lamp', 'laptop', 'Leopards', 'llama', 'lobster', 'lotus', 'mandolin', 'mayfly', 'menorah', 'metronome', 'minaret', 'Motorbikes', 'nautilus', 'octopus', 'okapi', 'pagoda', 'panda', 'pigeon', 'pizza', 'platypus', 'pyramid', 'revolver', 'rhino', 'rooster', 'saxophone', 'schooner', 'scissors', 'scorpion', 'sea_horse',

In [3]:
with open("../../fiftyone/caltech101/info.json", 'r') as file:
    info = json.load(file)

In [4]:
info

{'name': 'caltech101',
 'zoo_dataset': 'fiftyone.zoo.datasets.base.Caltech101Dataset',
 'dataset_type': 'fiftyone.types.dataset_types.ImageClassificationDirectoryTree',
 'num_samples': 9145,
 'classes': ['BACKGROUND_Google',
  'Faces',
  'Faces_easy',
  'Leopards',
  'Motorbikes',
  'accordion',
  'airplanes',
  'anchor',
  'ant',
  'barrel',
  'bass',
  'beaver',
  'binocular',
  'bonsai',
  'brain',
  'brontosaurus',
  'buddha',
  'butterfly',
  'camera',
  'cannon',
  'car_side',
  'ceiling_fan',
  'cellphone',
  'chair',
  'chandelier',
  'cougar_body',
  'cougar_face',
  'crab',
  'crayfish',
  'crocodile',
  'crocodile_head',
  'cup',
  'dalmatian',
  'dollar_bill',
  'dolphin',
  'dragonfly',
  'electric_guitar',
  'elephant',
  'emu',
  'euphonium',
  'ewer',
  'ferry',
  'flamingo',
  'flamingo_head',
  'garfield',
  'gerenuk',
  'gramophone',
  'grand_piano',
  'hawksbill',
  'headphone',
  'hedgehog',
  'helicopter',
  'ibis',
  'inline_skate',
  'joshua_tree',
  'kangaroo',

In [5]:
info['classes']

['BACKGROUND_Google',
 'Faces',
 'Faces_easy',
 'Leopards',
 'Motorbikes',
 'accordion',
 'airplanes',
 'anchor',
 'ant',
 'barrel',
 'bass',
 'beaver',
 'binocular',
 'bonsai',
 'brain',
 'brontosaurus',
 'buddha',
 'butterfly',
 'camera',
 'cannon',
 'car_side',
 'ceiling_fan',
 'cellphone',
 'chair',
 'chandelier',
 'cougar_body',
 'cougar_face',
 'crab',
 'crayfish',
 'crocodile',
 'crocodile_head',
 'cup',
 'dalmatian',
 'dollar_bill',
 'dolphin',
 'dragonfly',
 'electric_guitar',
 'elephant',
 'emu',
 'euphonium',
 'ewer',
 'ferry',
 'flamingo',
 'flamingo_head',
 'garfield',
 'gerenuk',
 'gramophone',
 'grand_piano',
 'hawksbill',
 'headphone',
 'hedgehog',
 'helicopter',
 'ibis',
 'inline_skate',
 'joshua_tree',
 'kangaroo',
 'ketch',
 'lamp',
 'laptop',
 'llama',
 'lobster',
 'lotus',
 'mandolin',
 'mayfly',
 'menorah',
 'metronome',
 'minaret',
 'nautilus',
 'octopus',
 'okapi',
 'pagoda',
 'panda',
 'pigeon',
 'pizza',
 'platypus',
 'pyramid',
 'revolver',
 'rhino',
 'rooste

In [6]:
for cla in info['classes']:
    print(len(os.listdir(f"../../fiftyone/caltech101/{cla}")))

468
435
435
200
798
55
800
42
42
47
54
46
33
128
98
43
85
91
50
43
123
47
59
62
107
47
69
73
70
50
51
57
67
52
65
68
75
64
53
64
85
67
67
45
34
34
51
99
100
42
54
88
80
31
64
86
114
61
81
78
41
66
43
40
87
32
76
55
35
39
47
38
45
53
34
57
82
59
49
40
63
39
84
57
35
64
45
86
59
64
35
85
49
86
75
239
37
59
34
56
39
60


In [7]:
splitted_dataset = defaultdict()

In [8]:
for cla in info['classes']:
    old_path = f"../../fiftyone/caltech101/{cla}/"
    new_train_path = f"../../fiftyone/caltech101_train/{cla}/"
    new_valid_path = f"../../fiftyone/caltech101_valid/{cla}/"
    new_test_path = f"../../fiftyone/caltech101_test/{cla}/"
    data = os.listdir(old_path)
    num_data = len(os.listdir(old_path))
    idx = [i for i in range(num_data)]
    random.shuffle(idx)
    train = data[: int(num_data/2)]
    valid = data[int(num_data/2): int(num_data/1.3333)]
    test = data[int(num_data/1.3333):]
    os.makedirs(new_train_path, exist_ok=True)
    for file_name in train:
        # construct full file path
        source = old_path + file_name
        destination = new_train_path + file_name
        # copy only files
        if os.path.isfile(source):
            shutil.copy(source, destination)
            
    os.makedirs(new_valid_path, exist_ok=True)
    for file_name in valid:
        # construct full file path
        source = old_path + file_name
        destination = new_valid_path + file_name
        # copy only files
        if os.path.isfile(source):
            shutil.copy(source, destination)
            
    os.makedirs(new_test_path, exist_ok=True)
    for file_name in test:
        # construct full file path
        source = old_path + file_name
        destination = new_test_path + file_name
        # copy only files
        if os.path.isfile(source):
            shutil.copy(source, destination)