In [1]:
import os
import pandas as pd
import nltk
wn = nltk.corpus.wordnet
from digital_manuscript import BnF

In [2]:
path = os.getcwd() + '/../manuscript-object/thesaurus/animal.csv'

df = pd.read_csv(path)
terms = list(set(df['prefLabel_en']))
simple_terms = [t for t in terms if ' ' not in t]
for i in range(len(simple_terms)) :
    if simple_terms[i] in ['weasel','turtle','cuckoo','codfish','mussel','chicken','goldfinch','turtledofe','linnet', 'water-dog']: #first meaning is not the animal. Birds tend to have another bird as first meaning
        if simple_terms[i]=='turtledofe':
            simple_terms[i]='turtledove'
        elif simple_terms[i]=='water-dog':
            simple_terms[i]='water_dog'
        lit = [simple_terms[i],1]
    elif simple_terms[i] in ['ewe','swallow','crayfish','partridge','hart',"calendra",'calandra','hog']:#have to go to third meaning to get the animal or the right bird
        if simple_terms[i] in ["calendra",'calandra']:
            simple_terms[i]='lark'
        lit = [simple_terms[i],2]
    elif simple_terms[i] in ['dragon']:#the mythical creature does not have a link to animal. This is "any of several small tropical Asian lizards capable of gliding by spreading winglike membranes on each side of the body"
        lit = [simple_terms[i],3]
    else :
        lit = [simple_terms[i],0]
    simple_terms[i]=lit
simple_terms

[['hen', 0],
 ['bird', 0],
 ['cat', 0],
 ['fish', 0],
 ['goat', 0],
 ['mouse', 0],
 ['crow', 0],
 ['hart', 2],
 ['viper', 0],
 ['pig', 0],
 ['fly', 0],
 ['dormouse', 0],
 ['spider', 0],
 ['animal', 0],
 ['water_dog', 1],
 ['lizard', 0],
 ['ant', 0],
 ['petit-gri', 0],
 ['oyster', 0],
 ['grasshopper', 0],
 ['codfish', 1],
 ['butterfly', 0],
 ['lark', 0],
 ['nightingale', 0],
 ['partridge', 2],
 ['porcupine', 0],
 ['cuckoo', 1],
 ['ortolan', 0],
 ['rabbit', 0],
 ['rat', 0],
 ['bittern', 0],
 ['crayfish', 2],
 ['sheep', 0],
 ['squirrel', 0],
 ['mealworm', 0],
 ['toad', 0],
 ['beef', 0],
 ['ox', 0],
 ['cuttlefish', 0],
 ['snail', 0],
 ['mule', 0],
 ['turtle', 1],
 ['linnet', 1],
 ['swallow', 2],
 ['mussel', 1],
 ['snake', 0],
 ['verdaule', 0],
 ['bat', 0],
 ['bombicum', 0],
 ['calf', 0],
 ['pigeon', 0],
 ['aucupio', 0],
 ['chaffinch', 0],
 ['dog', 0],
 ['duck', 0],
 ['wolf', 0],
 ['shell', 0],
 ['ewe', 2],
 ['tellin', 0],
 ['crab', 0],
 ['barbel', 0],
 ['earthworm', 0],
 ['silkworm', 0],
 

In [3]:
df = pd.DataFrame(simple_terms)
df.to_csv('./simple_terms.csv',sep=',',index=False)

In [4]:
term = 'dog'
dog = wn.synsets(term, pos=wn.NOUN)
dog
# hyper = dog[0].hypernyms()
# for x in hyper:
#     print(x, x.hypernyms())

# from nltk.corpus import wordnet as wn
# for ss in wn.synsets(term):
#     print (ss, ss.hypernyms())

[Synset('dog.n.01'),
 Synset('frump.n.01'),
 Synset('dog.n.03'),
 Synset('cad.n.01'),
 Synset('frank.n.02'),
 Synset('pawl.n.01'),
 Synset('andiron.n.01')]

In [5]:
animal = wn.synset('animal.n.01')

class r_node:
    def __init__(self, lemma, children=[]):
        self.lemma = lemma
        
#         if lemma == animal:
#             print(children + [lemma,])
        
        self.children = children
        self.hypernyms = lemma.hypernyms()
        self.parents = [r_node(h, children + [lemma,]) for h in self.hypernyms]
        
    def return_animal_path(self):
        if self.lemma == animal:
            return [[self.lemma,],]
        else:
            parent_paths = [n.return_animal_path() for n in self.parents]
            parent_paths = [p for p in parent_paths if len(p) > 0]
            paths = []
            for p in parent_paths:
                paths += p
            for p in paths:
                p.append(self.lemma)
            return paths

In [6]:
dog = wn.synsets('dog', pos=wn.NOUN)[0]
dn = r_node(dog)
dn.return_animal_path()
# print([p.parents[0].lemma for p in dn.parents])

# animal


[[Synset('animal.n.01'),
  Synset('chordate.n.01'),
  Synset('vertebrate.n.01'),
  Synset('mammal.n.01'),
  Synset('placental.n.01'),
  Synset('carnivore.n.01'),
  Synset('canine.n.02'),
  Synset('dog.n.01')],
 [Synset('animal.n.01'), Synset('domestic_animal.n.01'), Synset('dog.n.01')]]

In [7]:
squirrel = wn.synsets('squirrel')[0]
sn = r_node(squirrel)
sn.return_animal_path()

[[Synset('animal.n.01'),
  Synset('chordate.n.01'),
  Synset('vertebrate.n.01'),
  Synset('mammal.n.01'),
  Synset('placental.n.01'),
  Synset('rodent.n.01'),
  Synset('squirrel.n.01')]]

In [8]:
fish = wn.synsets('fish', pos=wn.NOUN)[0]
fn = r_node(fish)
fn.return_animal_path()

[[Synset('animal.n.01'),
  Synset('chordate.n.01'),
  Synset('vertebrate.n.01'),
  Synset('aquatic_vertebrate.n.01'),
  Synset('fish.n.01')]]

In [9]:
louse = wn.synsets('louse')[0]
ln = r_node(louse)
ln.return_animal_path()

[[Synset('animal.n.01'),
  Synset('invertebrate.n.01'),
  Synset('arthropod.n.01'),
  Synset('insect.n.01'),
  Synset('louse.n.01')]]

In [10]:
oyster = wn.synsets('oyster')[0]
on = r_node(oyster)
on.return_animal_path()

[[Synset('animal.n.01'),
  Synset('invertebrate.n.01'),
  Synset('mollusk.n.01'),
  Synset('bivalve.n.01'),
  Synset('oyster.n.01')]]

In [11]:
def get_animal_paths(lemma):
    paths = []
    node = r_node(lemma)
    
    
    
    

In [12]:
squirrel = wn.synsets('squirrel')[0]
weasel = wn.synsets('weasel')[1] # index 0 is a sneaky person
squirrel.lowest_common_hypernyms(weasel)

snake = wn.synsets('snake')[0]
print(snake.lowest_common_hypernyms(weasel))
print(snake.lowest_common_hypernyms(squirrel))

turtle = wn.synsets('turtle')[1] # index 0 is a turtleneck (lmao)
print(turtle.lowest_common_hypernyms(snake))
print(turtle.lowest_common_hypernyms(squirrel))

fish = wn.synsets('fish', pos=wn.NOUN)[0] # pos = part of speech
swan = wn.synsets('swan')[0]
print(fish.lowest_common_hypernyms(turtle))
print(fish.lowest_common_hypernyms(snake))

louse = wn.synsets('louse')[0]
oyster = wn.synsets('oyster')[0]
print(louse.lowest_common_hypernyms(snake))
print(louse.lowest_common_hypernyms(oyster))

[Synset('vertebrate.n.01')]
[Synset('vertebrate.n.01')]
[Synset('reptile.n.01')]
[Synset('vertebrate.n.01')]
[Synset('vertebrate.n.01')]
[Synset('vertebrate.n.01')]
[Synset('animal.n.01')]
[Synset('invertebrate.n.01')]


In [13]:
class Node(object):

    def __init__(self, value, parent):
        self.value = value
        self.children = []
        if parent is not None:
            parent.children.append(self)

In [14]:
from print_tree import print_tree   #actually using package print-tree2 from pypi

class print_custom_tree(print_tree):

    def get_children(self, node):
        return node.children

    def get_node_str(self, node):
        return str(node.value)

In [15]:
animaln = Node('animal', None)
visited = []
skipped=0

for animals in simple_terms :
    if animals[0] not in ['bombicum','og','verdaule','tellin','petit-gri','aucupio','daot','shell','mutton','barbel','pork']:   #not in wordnet or not in tree
        ani = wn.synsets(animals[0])[animals[1]]
        an = r_node(ani)
        lis = an.return_animal_path()[0]
        for i in range (1,len(lis)):
            name = lis[i].name().split('.')[0]
            if name in ['even-toed_ungulate','odd-toed_ungulate','orthopterous_insect','chordate','placental','ungulate','thrush','oscine','leporid','lagomorph','decapod_crustacean','chordate','diapsid','anapsid','chelonian','bovid','bovine',"anseriform_bird",'wading_bird','columbiform_bird','corvine_bird','cuculiform_bird','aquatic_vertebrate','bony_fish','teleost_fish','gadoid','ganoid','annelid','oligochaete','decapod','soft-finned_fish','phasianid','ambystomid','red_deer','hominid','agamid', 'sporting_dog']: #- make everything go boom also things I don't care about
                skipped+=1
                visited.append(name)
            elif name not in visited :
                name_prec = lis[i-1-skipped].name().split('.')[0]
                skipped=0
                string = name+'n'+' = Node(name,'+ name_prec+'n)'
                exec(string)
                visited.append(name)
            else :
                skipped=0

print_custom_tree(animaln) 


                                                              ┌hen
                                        ┌domestic_fowl─chicken┤
                                        │                     └chick
                      ┌gallinaceous_bird┼game_bird─partridge
                      │                 └pigeon─dove─turtledove
                      │         ┌crow
                      │         ├new_world_oriole─meadowlark
                      │         ├nightingale
                      ├passerine┤
                      │         │     ┌bunting─ortolan
                      │         │     ├linnet
                      │         ├finch┼chaffinch
                      │         │     ├siskin
                      │         │     └goldfinch
                      │         ├swallow
                      │         └lark
                 ┌bird┼cuckoo
                 │    │            ┌heron─bittern
                 │    ├aquatic_bird┼waterfowl─duck
                 │    │            └swan


<__main__.print_custom_tree at 0x1af00b634c8>