In [1]:
import json
from utils import data_utils
from concept_generation import conceptset_utils
import os

In [2]:
"""
CLASS_SIM_CUTOFF: Concenpts with cos similarity higher than this to any class will be removed
OTHER_SIM_CUTOFF: Concenpts with cos similarity higher than this to another concept will be removed
MAX_LEN: max number of characters in a concept

PRINT_PROB: what percentage of filtered concepts will be printed
"""
dir_name = os.path.join(os.path.dirname(os.getcwd()) , "data/concept_sets")
device = "cuda"
CLASS_SIM_CUTOFF = 0.85
OTHER_SIM_CUTOFF = 0.9
MAX_LEN = 30
PRINT_PROB = 1

dataset = "cub"
model = "gemini"

save_name = dir_name + "/{}_filtered_new.txt".format(dataset)

In [3]:
#EDIT these to use the initial concept sets you want

with open(os.path.join(dir_name, "{}_init/{}_{}_important.json".format(model, model, dataset)), "r") as f:
    important_dict = json.load(f)
with open(os.path.join(dir_name, "{}_init/{}_{}_superclass.json".format(model, model, dataset)), "r") as f:
    superclass_dict = json.load(f)
with open(os.path.join(dir_name, "{}_init/{}_{}_around.json".format(model, model, dataset)), "r") as f:
    around_dict = json.load(f)
    
with open(os.path.join(os.path.dirname(os.getcwd()) ,data_utils.LABEL_FILES[dataset]), "r") as f:
    classes = f.read().split("\n")

In [4]:
concepts = set()

for values in important_dict.values():
    concepts.update(set(values))

for values in superclass_dict.values():
    concepts.update(set(values))
    
for values in around_dict.values():
    concepts.update(set(values))

print(len(concepts))
print(concepts)

2718
{'Medium-sized body', 'Reddish iris (eye color)', 'often stays low in dense vegetation', 'Here are the superclasses for "Great_Crested_Flycatcher":', 'distinctive red eye-ring', 'distinctive streaking on the breast and flanks', 'burrows or crevices (nesting sites)', 'Drab brown/grey upperparts', 'Other Parakeet Auklets (pairs or small flocks)', 'Olive-green or yellowish-green back and head', 'sturdy, hooked bill', 'distinctive black and white head pattern (black crown, black eye stripe, white eyebrow)', 'the Arctic', 'insects (their food source)', 'a field', 'a prominent, pale eye-ring', 'water (swamp, creek, slow river)', 'a thicket', 'rusty or rufous colored breast and belly (especially in males)', 'leaf', 'Mottled brown, gray, and black plumage: Excellent camouflage for resting on the forest floor or tree branches during the day.', 'black back, wings, and head', 'wetlands', 'striking head pattern with white or yellowish cap and dark stripes', 'black streaks on flanks', 'organis

In [5]:
concepts = conceptset_utils.remove_too_long(concepts, MAX_LEN, PRINT_PROB)

35 often stays low in dense vegetation
57 Here are the superclasses for "Great_Crested_Flycatcher":
46 distinctive streaking on the breast and flanks
35 burrows or crevices (nesting sites)
46 Other Parakeet Auklets (pairs or small flocks)
44 Olive-green or yellowish-green back and head
87 distinctive black and white head pattern (black crown, black eye stripe, white eyebrow)
32 water (swamp, creek, slow river)
62 rusty or rufous colored breast and belly (especially in males)
125 Mottled brown, gray, and black plumage: Excellent camouflage for resting on the forest floor or tree branches during the day.
66 striking head pattern with white or yellowish cap and dark stripes
50 brown, streaky plumage (for females and juveniles)
50 A stocky, somewhat plump body shape for a warbler.
64 A somewhat plain head, sometimes with a weak pale eyebrow stripe
101 Here are things most commonly seen around a "Sayornis" (a genus of birds, specifically Phoebe birds):
32 Predominantly white body plumage
72

In [6]:
concepts = conceptset_utils.filter_too_similar_to_cls(concepts, classes, CLASS_SIM_CUTOFF, device, PRINT_PROB)

1500
1500
Class:Least Auklet - Concept:auklet, sim:0.906 - Deleting auklet

Class:Brandt Cormorant - Concept:Cormorant, sim:0.850 - Deleting Cormorant

Class:Brandt Cormorant - Concept:cormorant, sim:0.850 - Deleting cormorant

Class:American Crow - Concept:crow, sim:0.882 - Deleting crow

Class:Acadian Flycatcher - Concept:Medium-sized flycatcher, sim:0.852 - Deleting Medium-sized flycatcher

Class:Least Flycatcher - Concept:flycatcher, sim:0.912 - Deleting flycatcher

Class:Least Flycatcher - Concept:large size for a flycatcher, sim:0.863 - Deleting large size for a flycatcher

Class:Frigatebird - Concept:other frigatebirds, sim:0.886 - Deleting other frigatebirds

Class:California Gull - Concept:medium-sized gull, sim:0.866 - Deleting medium-sized gull

Class:Western Gull - Concept:Large gull size, sim:0.858 - Deleting Large gull size

Class:Western Gull - Concept:Large size for a gull, sim:0.851 - Deleting Large size for a gull

Class:Western Gull - Concept:large size for a gull, s

In [7]:
concepts = conceptset_utils.filter_too_similar(concepts, OTHER_SIM_CUTOFF, device, PRINT_PROB)

A dark eye - dark eye , sim:0.9460 - Deleting dark eye
A dark eye - dark eyes , sim:0.9007 - Deleting dark eyes
A garden - Gardens , sim:0.9099 - Deleting A garden
A long, pointed, dark beak - A long, thin, straight beak , sim:0.9223 - Deleting A long, thin, straight beak
A long, pointed, dark beak - Pointed beak , sim:0.9089 - Deleting Pointed beak
A long, pointed, dark beak - a dark beak , sim:0.9384 - Deleting A long, pointed, dark beak
A long, thin, serrated bill - a long, thin, serrated bill , sim:0.9990 - Deleting a long, thin, serrated bill
A noticeable, pale eye-ring - A prominent pale eye-ring , sim:0.9566 - Deleting A prominent pale eye-ring
A noticeable, pale eye-ring - A subtle pale eye-ring. , sim:0.9525 - Deleting A subtle pale eye-ring.
A noticeable, pale eye-ring - a pale eye-ring , sim:0.9583 - Deleting a pale eye-ring
A noticeable, pale eye-ring - a prominent, pale eye-ring , sim:0.9708 - Deleting a prominent, pale eye-ring
A noticeable, pale eye-ring - a subtle pale 

In [8]:
with open(save_name, "w") as f:
    f.write(concepts[0])
    for concept in concepts[1:]:
        f.write("\n" + concept)