In [54]:
from nltk.corpus import wordnet as wn
import networkx as nx
import matplotlib.pyplot as plt
import numpy

In [55]:
def closure_graph(synset, fn):
    seen = set()
    graph = nx.DiGraph()

    def recurse(s):
        if not s in seen:
            seen.add(s)
            graph.add_node(s.name)
            for s1 in fn(s):
                graph.add_node(s1.name)
                graph.add_edge(s.name, s1.name)
                recurse(s1)

    recurse(synset)
    return graph

#label = 'kit_fox'

#graph = closure_graph(wn.synset(label + '.n.01'), lambda s: s.hypernyms())
#nx.draw_graphviz(graph, with_labels = True)
#plt.show()
#plt.savefig("plot.png")

In [56]:
f = open('./labels.txt', 'r')
labels = f.readlines()

for i in xrange(len(labels)):
    labels[i] = labels[i][:-1]
    
labels = numpy.array(labels)

In [57]:
print 'Number of labels : ', len(labels)

Number of labels :  1000


In [58]:
number_of_syns = []

for i in xrange(len(labels)):
    number_of_syns.append(len(wn.synsets(labels[i])))
    
number_of_syns = numpy.array(number_of_syns)

In [59]:
max_number_of_syns = numpy.max(number_of_syns)
min_number_of_syns = numpy.min(number_of_syns)

print 'Maximum number of syns in the labels :', max_number_of_syns
print 'Minimum number of syns in the labels :', min_number_of_syns

plt.hist(number_of_syns, 
         bins = max_number_of_syns - min_number_of_syns + 1, 
         range = (min_number_of_syns, max_number_of_syns + 1), 
         align = 'left')

plt.show()

Maximum number of syns in the labels : 29
Minimum number of syns in the labels : 1


In [60]:
one_syn_indexes = numpy.where(number_of_syns == 1)[0]
multi_syn_indexes = numpy.where(number_of_syns != 1)[0]

print 'Number of single syns :', len(one_syn_indexes)
print 'Number of multi syns  :', len(multi_syn_indexes)

Number of single syns : 665
Number of multi syns  : 335


graphs = []
definitions = []
which_synset = []

for i in xrange(len(labels)):
    if i in one_syn_indexes:
        graphs.append(closure_graph(wn.synset(labels[i] + '.n.01'), lambda s: s.hypernyms()))
        definitions.append(wn.synset(labels[i] + '.n.01').definition())
        which_synset.append(1)
        
    else:
        graphs.append(None)
        definitions.append(None)
        which_synset.append(None)
        
for i in multi_syn_indexes:
    for k in xrange(1, 5):
        g = closure_graph(wn.synset(labels[i] + '.n.0' + str(k)), lambda s: s.hypernyms())
        
        if len(g.nodes()) > 5:
            graphs[i] = g
            definitions[i] = wn.synset(labels[i] + '.n.0' + str(k)).definition()
            which_synset[i] = k
            break
            
graphs = numpy.array(graphs)
definitions = numpy.array(definitions)
which_synset = numpy.array(which_synset)

In [61]:
graphs_one_syn = []
definitions_one_syn = []
which_synset_one_syn = []

for i in one_syn_indexes:
    graphs_one_syn.append(closure_graph(wn.synsets(labels[i])[0], lambda s: s.hypernyms()))
    definitions_one_syn.append(wn.synsets(labels[i])[0].definition())
    which_synset_one_syn.append(wn.synsets(labels[i])[0].name())
    
graphs_one_syn = numpy.array(graphs_one_syn)
definitions_one_syn = numpy.array(definitions_one_syn)
which_synset_one_syn = numpy.array(which_synset_one_syn)

In [62]:
graphs_multi_syn = []
definitions_multi_syn = []
which_synset_multi_syn = []
        
for i in multi_syn_indexes:  
    label = labels[i].lower()
    
    syn_names = []
    
    for one_synset in wn.synsets(label):
        if one_synset.name().startswith(label + '.n'):
            syn_names.append(one_synset.name())
            
    for syn_name in syn_names:
        g = closure_graph(wn.synset(syn_name), lambda s: s.hypernyms())
        nodes_of_g = g.nodes()  
        node_names_of_g = numpy.array([nog.im_self.name() for nog in nodes_of_g])
        
        if any(node_names_of_g == u'physical_entity.n.01'):
            graphs_multi_syn.append(g)
            definitions_multi_syn.append(wn.synset(syn_name).definition())
            which_synset_multi_syn.append(syn_name)
            break
    
    
    #number_of_nodes_syn = []
    
    #for syn_name in syn_names:
    #    g = closure_graph(wn.synset(syn_name), lambda s: s.hypernyms())
    #    number_of_nodes_syn.append(len(g.nodes()))
        
    #k = numpy.argmax(number_of_nodes_syn)
    
    #graphs_multi_syn.append(closure_graph(wn.synset(syn_names[k]), lambda s: s.hypernyms()))
    #definitions_multi_syn.append(wn.synset(syn_names[k]).definition())
    #which_synset_multi_syn.append(syn_names[k])
            
graphs_multi_syn = numpy.array(graphs_multi_syn)
definitions_multi_syn = numpy.array(definitions_multi_syn)
which_synset_multi_syn = numpy.array(which_synset_multi_syn)

In [63]:
graphs = []
definitions = []
which_synset = []

onek = 0
multik = 0

for i in xrange(len(labels)):
    
    if i in one_syn_indexes:
        graphs.append(graphs_one_syn[onek])
        definitions.append(definitions_one_syn[onek])
        which_synset.append(which_synset_one_syn[onek])
        onek += 1
        
    elif i in multi_syn_indexes:
        graphs.append(graphs_multi_syn[multik])
        definitions.append(definitions_multi_syn[multik])
        which_synset.append(which_synset_multi_syn[multik])
        multik += 1
        
graphs = numpy.array(graphs)
definitions = numpy.array(definitions)
which_synset = numpy.array(which_synset)

In [64]:
selected_synset_no = []

for ws_ in which_synset:
    selected_synset_no.append(int(ws_.split('.')[-1]))
    
selected_synset_no = numpy.array(selected_synset_no)

In [65]:
max_selected_synset = numpy.max(selected_synset_no)
min_selected_synset = numpy.min(selected_synset_no)

print 'Max selected synset :', max_selected_synset
print 'Min selected synset :', min_selected_synset

plt.hist(selected_synset_no, 
         bins = max_selected_synset - min_selected_synset + 1, 
         range = (min_selected_synset, max_selected_synset + 1), 
         align = 'left')

plt.show()

Max selected synset : 12
Min selected synset : 1


In [66]:
any_graph_length_1 = False
any_none_in_graphs = False

for g in graphs:
    if g == None:
        any_none_in_graphs = True
    elif len(g.nodes()) <= 1:
        any_graph_length_1 = True


print 'Any None     :', any_none_in_graphs
print 'Any Length 1 :', any_graph_length_1

Any None     : False
Any Length 1 : False


In [67]:
number_of_nodes = []
number_of_edges = []

for g in graphs:
    number_of_nodes.append(len(g.nodes()))
    number_of_edges.append(len(g.edges()))

number_of_nodes = numpy.array(number_of_nodes)
number_of_edges = numpy.array(number_of_edges)

In [68]:
min_number_of_nodes = numpy.min(number_of_nodes)
max_number_of_nodes = numpy.max(number_of_nodes)

print 'Max number of nodes :', max_number_of_nodes
print 'Min number of nodes :', min_number_of_nodes

plt.hist(number_of_nodes, 
         bins = max_number_of_nodes - min_number_of_nodes + 1, 
         range = (min_number_of_nodes, max_number_of_nodes + 1), 
         align = 'left')

plt.show()

Max number of nodes : 20
Min number of nodes : 5


In [69]:
node_names = []

for g in graphs:
    node_names_g = []
    for i in xrange(len(g)):
        node_names_g.append(g.nodes()[i].im_self.name().split('.')[0])
    node_names_g = numpy.array(node_names_g)
    node_names.append(node_names_g)
    
node_names = numpy.array(node_names)

In [70]:
entity_exists = []
physical_entity_exists = []

for i in xrange(len(node_names)):
    entity_exists.append(any(node_names[i] == u'entity'))
    physical_entity_exists.append(any(node_names[i] == u'physical_entity'))
    
entity_exists = numpy.array(entity_exists)
physical_entity_exists = numpy.array(physical_entity_exists)

In [71]:
for i in numpy.where(physical_entity_exists == False)[0]:
    print labels[i]
    print node_names[i]
    print '\n'

traffic_light
[u'signal' u'visual_signal' u'abstraction' u'entity' u'communication'
 u'traffic_light' u'light']


street_sign
[u'abstraction' u'sign' u'communication' u'street_sign' u'entity']




In [72]:
print 'Entity does not exists in nodes          :', numpy.where(entity_exists == False)[0]
print 'Physical Entity does not exists in nodes :', numpy.where(physical_entity_exists == False)[0]

Entity does not exists in nodes          : []
Physical Entity does not exists in nodes : [860 931]


In [126]:
analyze =  numpy.where(number_of_nodes <= 5)[0]

for i in analyze:
    print labels[i]
    print node_names[i]
    print '\n'

cliff
[u'geological_formation' u'object' u'entity' u'physical_entity' u'cliff']


chain
[u'unit' u'thing' u'entity' u'physical_entity' u'chain']


street_sign
[u'abstraction' u'sign' u'communication' u'street_sign' u'entity']




In [127]:
for i in xrange(500, 502):
    print labels[i]
    print node_names[i]
    #nx.draw_graphviz(graphs[i], with_labels = True)
    #plt.show()
    print '\n'

tailed_frog
[u'chordate' u'amphibian' u'physical_entity' u'animal' u'tailed_frog'
 u'vertebrate' u'entity' u'object' u'living_thing' u'organism' u'frog'
 u'whole']


whistle
[u'physical_entity' u'artifact' u'device' u'entity' u'wind_instrument'
 u'musical_instrument' u'object' u'instrumentality' u'whole' u'whistle']




nx.draw_graphviz(graphs[22], with_labels = True); plt.show()

#TREE

## After physical entity node - l2

In [122]:
object_indexes = set([i for i in xrange(len(labels)) if any(node_names[i] == u'object')])
thing_indexes = set([i for i in xrange(len(labels)) if any(node_names[i] == u'thing')])
substance_indexes = set([i for i in xrange(len(labels)) if any(node_names[i] == u'substance')])
process_indexes = set([i for i in xrange(len(labels)) if any(node_names[i] == u'process')])

print 'Object    :', len(object_indexes)
print 'Thing     :', len(thing_indexes)
print 'Substance :', len(substance_indexes)
print 'Process   :', len(process_indexes)

print '\nTotal     :', len(object_indexes) + len(thing_indexes) + len(substance_indexes) + len(process_indexes)

print '\no0 :', len(object_indexes.intersection(thing_indexes))
print 'o1 :', len(object_indexes.intersection(substance_indexes))
print 'o2 :', len(object_indexes.intersection(process_indexes))

print 't0 :', len(thing_indexes.intersection(substance_indexes))
print 't1 :', len(thing_indexes.intersection(process_indexes))

print 's0 :', len(substance_indexes.intersection(process_indexes))

covered = object_indexes.union(thing_indexes).union(substance_indexes).union(process_indexes)
uncovered = set(range(1000)) - covered

for i in list(uncovered):
    print '\n'
    print labels[i]
    print node_names[i]
    print '\n'
    
print '------------------'
print 'Covered    :', len(covered)
print 'Uncovered   :', len(uncovered)

Object    : 943
Thing     : 9
Substance : 36
Process   : 1

Total     : 989

o0 : 0
o1 : 1
o2 : 0
t0 : 0
t1 : 0
s0 : 0


eel
[u'fish' u'food' u'physical_entity' u'entity' u'solid' u'matter' u'eel']




street_sign
[u'abstraction' u'sign' u'communication' u'street_sign' u'entity']




Dungeness_crab
[u'shellfish' u'food' u'physical_entity' u'dungeness_crab' u'crab' u'solid'
 u'matter' u'seafood' u'entity']




sorrel
[u'vegetable' u'food' u'physical_entity' u'entity' u'solid' u'matter'
 u'greens' u'produce' u'sorrel']




American_lobster
[u'shellfish' u'food' u'physical_entity' u'lobster' u'entity' u'solid'
 u'matter' u'seafood' u'american_lobster']




spiny_lobster
[u'shellfish' u'food' u'physical_entity' u'spiny_lobster' u'entity'
 u'solid' u'matter' u'seafood']




crayfish
[u'shellfish' u'food' u'physical_entity' u'crayfish' u'entity' u'solid'
 u'matter' u'seafood']




racer
[u'physical_entity' u'operator' u'entity' u'racer' u'causal_agent'
 u'driver']




quail
[u'physical_entit

In [125]:
instrumentation_indexes = set([i for i in xrange(len(labels)) if any(node_names[i] == u'instrumentality')])

print len(instrumentation_indexes)

338


In [23]:
#entity_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'entity')])
#object_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'object')])

# instrumentality ve covering cakisiyordu (5 loop yapan graph için), bunlarin yerine ust sinifi olan artifact kondu.

In [24]:
animal_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'animal')])
plant_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'plant')])
food_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'food')])
artifact_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'artifact')])
person_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'person')])
fungus_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'fungus')])

In [25]:
#dog_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'dog')])
#cat_indexes = numpy.array([i for i in xrange(len(labels)) if any(node_names[i] == u'cat')])

In [26]:
#entity_indexes_set = set(entity_indexes)
#object_indexes_set = set(object_indexes)

animal_indexes_set = set(animal_indexes)
plant_indexes_set = set(plant_indexes)
food_indexes_set = set(food_indexes)
artifact_indexes_set = set(artifact_indexes)
person_indexes_set = set(person_indexes)
fungus_indexes_set = set(fungus_indexes)

#dog_indexes_set = set(dog_indexes)
#cat_indexes_set = set(cat_indexes)

In [27]:
#print len(entity_indexes)
#print len(object_indexes)
#print '---'
print 'animal    :', len(animal_indexes)
print 'plant     :', len(plant_indexes)
print 'food      :', len(food_indexes)
print 'artifact  :', len(artifact_indexes)
print 'person    :', len(person_indexes)
print 'fungus    :', len(fungus_indexes)

print 'Total :', len(animal_indexes) + len(plant_indexes) + len(food_indexes) + \
                 len(artifact_indexes) + len(person_indexes) + len(fungus_indexes)
#print '---'
#print len(dog_indexes)
#print len(cat_indexes)

print '------------------'

#print len(entity_indexes) == len(entity_indexes_set)
#print len(object_indexes) == len(object_indexes_set)
#print '---'
print len(animal_indexes) == len(animal_indexes_set)
print len(plant_indexes) == len(plant_indexes_set)
print len(food_indexes) == len(food_indexes_set)
print len(artifact_indexes) == len(artifact_indexes_set)
print len(person_indexes) == len(person_indexes_set)
print len(fungus_indexes) == len(fungus_indexes_set)

#print '---'
#print len(dog_indexes_set) == len(dog_indexes)
#print len(cat_indexes_set) == len(cat_indexes)

print '-----------------'

print 'a0 :', animal_indexes_set.intersection(plant_indexes_set)
print 'a1 :', animal_indexes_set.intersection(food_indexes_set)
print 'a2 :', animal_indexes_set.intersection(artifact_indexes_set)
print 'a3 :', animal_indexes_set.intersection(person_indexes_set)
print 'a4 :', animal_indexes_set.intersection(fungus_indexes_set)

print 'p0 :', plant_indexes_set.intersection(food_indexes_set)
print 'p1 :', plant_indexes_set.intersection(artifact_indexes_set)
print 'p2 :', plant_indexes_set.intersection(person_indexes_set)
print 'p3 :', plant_indexes_set.intersection(fungus_indexes_set)

print 'f0 :', food_indexes_set.intersection(artifact_indexes_set)
print 'f1 :', food_indexes_set.intersection(person_indexes_set)
print 'f2 :', food_indexes_set.intersection(fungus_indexes_set)

print 'i0 :', artifact_indexes_set.intersection(person_indexes_set)
print 'i1 :', artifact_indexes_set.intersection(fungus_indexes_set)

print 'pe0 :', person_indexes_set.intersection(fungus_indexes_set)

#print animal_indexes_set.intersection(dog_indexes_set) == dog_indexes_set
#print animal_indexes_set.intersection(cat_indexes_set) == cat_indexes_set

#print dog_indexes_set.intersection(cat_indexes_set)

animal    : 365
plant     : 23
food      : 39
artifact  : 508
person    : 19
fungus    : 8
Total : 962
------------------
True
True
True
True
True
True
-----------------
a0 : set([])
a1 : set([])
a2 : set([])
a3 : set([])
a4 : set([])
p0 : set([])
p1 : set([696])
p2 : set([])
p3 : set([])
f0 : set([])
f1 : set([])
f2 : set([])
i0 : set([])
i1 : set([])
pe0 : set([])


In [28]:
print '\nNumber of animal labels : ', len(animal_indexes), '\n'
print labels[animal_indexes]

print '\nNumber of plant labels : ', len(plant_indexes), '\n'
print labels[plant_indexes]

print '\nNumber of food labels : ', len(food_indexes), '\n'
print labels[food_indexes]

print '\nNumber of instrumentation labels : ', len(artifact_indexes), '\n'
print labels[artifact_indexes]

print '\nNumber of person labels : ', len(person_indexes), '\n'
print labels[person_indexes]

print '\nNumber of fungus labels : ', len(fungus_indexes), '\n'
print labels[fungus_indexes]


Number of animal labels :  365 

['kit_fox' 'English_setter' 'Siberian_husky' 'Australian_terrier'
 'English_springer' 'grey_whale' 'lesser_panda' 'Egyptian_cat' 'ibex'
 'Persian_cat' 'cougar' 'gazelle' 'porcupine' 'sea_lion' 'malamute'
 'badger' 'Great_Dane' 'Walker_hound' 'Welsh_springer_spaniel' 'whippet'
 'Scottish_deerhound' 'killer_whale' 'African_elephant' 'Weimaraner'
 'soft-coated_wheaten_terrier' 'Dandie_Dinmont' 'red_wolf'
 'Old_English_sheepdog' 'jaguar' 'otterhound' 'bloodhound' 'Airedale'
 'hyena' 'meerkat' 'giant_schnauzer' 'three-toed_sloth'
 'black-footed_ferret' 'black-and-tan_coonhound' 'papillon' 'skunk'
 'Staffordshire_bullterrier' 'Mexican_hairless' 'Bouvier_des_Flandres'
 'miniature_poodle' 'malinois' 'bighorn' 'fox_squirrel' 'colobus'
 'tiger_cat' 'Lhasa' 'impala' 'coyote' 'Yorkshire_terrier' 'Newfoundland'
 'brown_bear' 'Norwegian_elkhound' 'Rottweiler' 'hartebeest' 'Saluki'
 'grey_fox' 'schipperke' 'Pekinese' 'Brabancon_griffon'
 'West_Highland_white_terrier'

In [29]:
#print '\nNumber of dog labels : ', len(dog_indexes), '\n'
#print labels[dog_indexes]

#print '\nNumber of cat labels : ', len(cat_indexes), '\n'
#print labels[cat_indexes]

In [30]:
covered = animal_indexes_set.union(plant_indexes_set)
covered = covered.union(food_indexes_set)
covered = covered.union(artifact_indexes_set)
covered = covered.union(person_indexes_set)
covered = covered.union(fungus_indexes_set)