### This notebook only provides the code for creating an openset ImageNet protocol

In [1]:
# https://github.com/ipython/ipython/issues/10493#issuecomment-392549088
%config IPCompleter.use_jedi = False

In [2]:
from itertools import zip_longest
from glob import glob
import csv
import random
random.seed(0)

### Set some parameters

In [3]:
Images_root_diectory="/net/kato/store2/ImageNet/ILSVRC_2012" # Do Not include slash at the end
Output_directory = "/home/adhamija/ImageNet_Dali/protocol_corrected/"
no_of_knowns=300
no_of_known_unknowns=350
no_of_unknowns=350
percentage_of_training_samples=0.8
percentage_of_validation_samples=1-percentage_of_training_samples

In [4]:
assert (no_of_knowns+no_of_known_unknowns+no_of_unknowns) == 1000

##### Find all KNowns, Known Unknowns & Unknown unknowns

In [5]:
all_classes=glob(f"{Images_root_diectory}/train/*/")
all_classes = [_.split('/')[-2] for _ in all_classes]
random.shuffle(all_classes)

In [6]:
known_classes = all_classes[:no_of_knowns]
KU_classes = all_classes[no_of_knowns:(no_of_known_unknowns+no_of_knowns)]
unknown_classes = all_classes[(-1*no_of_unknowns):]

In [7]:
len(known_classes),len(KU_classes),len(unknown_classes)

(300, 350, 350)

In [8]:
assert len(set(known_classes).intersection(set(KU_classes)))==0
assert len(set(known_classes).intersection(set(unknown_classes)))==0
assert len(set(unknown_classes).intersection(set(KU_classes)))==0

In [9]:
class_mappings=dict(zip(known_classes,range(len(known_classes))))

Create CSV for Knowns training and validation set 

In [10]:
knowns_training_list=[]
knowns_validation_list=[]
for cls_name in known_classes:
    image_names=glob(f"{Images_root_diectory}/train/{cls_name}/*")
    image_names=[i.split(f"{Images_root_diectory}/")[-1] for i in image_names]
    image_rows=list(zip_longest(image_names,[],fillvalue=class_mappings[cls_name]))    
    knowns_training_list.extend(image_rows[:int(len(image_rows)*percentage_of_training_samples)])
    knowns_validation_list.extend(image_rows[int(len(image_rows)*percentage_of_training_samples):])
    
with open(f"{Output_directory}/train_knowns.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(knowns_training_list)
with open(f"{Output_directory}/val_knowns.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(knowns_validation_list)

In [11]:
len(knowns_training_list),len(knowns_validation_list)

(306776, 76714)

Create CSV for Known Unknowns training and validation set 

In [12]:
knowns_training_list=[]
knowns_validation_list=[]
for cls_name in KU_classes:
    image_names=glob(f"{Images_root_diectory}/train/{cls_name}/*")
    image_names=[i.split(f"{Images_root_diectory}/")[-1] for i in image_names]
    image_rows=list(zip_longest(image_names,[],fillvalue=-1))
    knowns_training_list.extend(image_rows[:int(len(image_rows)*percentage_of_training_samples)])
    knowns_validation_list.extend(image_rows[int(len(image_rows)*percentage_of_training_samples):])
    
with open(f"{Output_directory}/train_knownUnknowns.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(knowns_training_list)
with open(f"{Output_directory}/val_knownUnknowns.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(knowns_validation_list)

In [13]:
len(knowns_training_list),len(knowns_validation_list)

(359515, 89894)

Create CSV for Knowns, Known Unknowns and Unknown Unknowns for testing

In [14]:
knowns_test_list=[]
for cls_name in known_classes:
    image_names=glob(f"{Images_root_diectory}/val_in_folders/{cls_name}/*")
    image_names=[i.split(f"{Images_root_diectory}/")[-1] for i in image_names]
    image_rows=list(zip_longest(image_names,[],fillvalue=class_mappings[cls_name]))
    knowns_test_list.extend(image_rows)

with open(f"{Output_directory}/test_knowns.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(knowns_test_list)

known_unknowns_test_list=[]
for cls_name in KU_classes:
    image_names=glob(f"{Images_root_diectory}/val_in_folders/{cls_name}/*")
    image_names=[i.split(f"{Images_root_diectory}/")[-1] for i in image_names]
    image_rows=list(zip_longest(image_names,[],fillvalue=-1))
    known_unknowns_test_list.extend(image_rows)
    
with open(f"{Output_directory}/test_knownUnknowns.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(known_unknowns_test_list)

unknowns_test_list=[]
for cls_name in unknown_classes:
    image_names=glob(f"{Images_root_diectory}/val_in_folders/{cls_name}/*")
    image_names=[i.split(f"{Images_root_diectory}/")[-1] for i in image_names]
    image_rows=list(zip_longest(image_names,[],fillvalue=-2))
    unknowns_test_list.extend(image_rows)
with open(f"{Output_directory}/test_unknowns.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(unknowns_test_list)