In [1]:
from pygermanet import load_germanet
gn = load_germanet()

In [13]:
gn.synsets('gehen')

[Synset(auseinandergehen.v.3),
 Synset(funktionieren.v.1),
 Synset(funktionieren.v.2),
 Synset(gehen.v.1),
 Synset(gehen.v.4),
 Synset(gehen.v.5),
 Synset(gehen.v.6),
 Synset(gehen.v.7),
 Synset(gehen.v.9),
 Synset(gehen.v.10),
 Synset(gehen.v.11),
 Synset(gehen.v.12),
 Synset(gehen.v.13),
 Synset(gehen.v.14),
 Synset(handeln.v.1)]

In [9]:
gn.lemmatise(u'ginge')

['diejenigen']

In [14]:
funktionieren = gn.synset(u'funktionieren.v.2')
funktionieren

Synset(funktionieren.v.2)

In [15]:
funktionieren.hyponyms

[Synset(vorgehen.v.1), Synset(leerlaufen.v.2)]

In [16]:
gn.synset('Husky.n.1').hypernym_paths

[[Synset(GNROOT.n.1),
  Synset(Entität.n.2),
  Synset(Objekt.n.4),
  Synset(Ding.n.2),
  Synset(Teil.n.2),
  Synset(Teilmenge.n.2),
  Synset(Gruppe.n.1),
  Synset(biologische Gruppe.n.1),
  Synset(Spezies.n.1),
  Synset(Rasse.n.1),
  Synset(Tierrasse.n.1),
  Synset(Hunderasse.n.1),
  Synset(Husky.n.1)],
 [Synset(GNROOT.n.1),
  Synset(Entität.n.2),
  Synset(kognitives Objekt.n.1),
  Synset(Kategorie.n.1),
  Synset(Art.n.1),
  Synset(Spezies.n.1),
  Synset(Rasse.n.1),
  Synset(Tierrasse.n.1),
  Synset(Hunderasse.n.1),
  Synset(Husky.n.1)],
 [Synset(GNROOT.n.1),
  Synset(Entität.n.2),
  Synset(Objekt.n.4),
  Synset(natürliches Objekt.n.1),
  Synset(Kreatur.n.1),
  Synset(Organismus.n.1),
  Synset(höheres Lebewesen.n.1),
  Synset(Tier.n.1),
  Synset(Gewebetier.n.1),
  Synset(Chordatier.n.1),
  Synset(Wirbeltier.n.1),
  Synset(Säugetier.n.1),
  Synset(Plazentatier.n.1),
  Synset(Raubtier.n.1),
  Synset(Landraubtier.n.1),
  Synset(hundeartiges Landraubtier.n.1),
  Synset(Hund.n.2),
  Synset(

In [17]:
funktionieren.lemmas

[Lemma(funktionieren.v.2.funktionieren),
 Lemma(funktionieren.v.2.funzen),
 Lemma(funktionieren.v.2.gehen),
 Lemma(funktionieren.v.2.laufen),
 Lemma(funktionieren.v.2.arbeiten)]

In [18]:
gn.lemmas('brennen')

[Lemma(brennen.v.1.brennen),
 Lemma(verbrennen.v.1.brennen),
 Lemma(brennen.v.3.brennen),
 Lemma(brennen.v.4.brennen),
 Lemma(brennen.v.5.brennen),
 Lemma(destillieren.v.1.brennen),
 Lemma(brennen.v.7.brennen),
 Lemma(brennen.v.8.brennen)]

In [9]:
from pygermanet import load_germanet, Synset
from scipy.stats.stats import pearsonr
#import codecs
import numpy as np

GUR65_FILENAME = '../data/corpora/GermanRelatednessDatasets/gurevych_datasets/wortpaare65.gold.pos.txt'

def load_gurevych():
    gur65 = []
    with open(GUR65_FILENAME, 'r') as input_file:
        for idx, line in enumerate(input_file):
            fields = line.strip().replace('#', '').split(':')
            if idx == 0:
                header = fields
            else:
                # fix typo in gur65
                fields[1] = {'Reis': 'Reise'}.get(fields[1], fields[1])
                fields[2] = float(fields[2])
                gur65.append(fields)
    gur65 = np.core.records.array(
        gur65,
        dtype=np.dtype({'formats': ['U30', 'U30', '<f8', 'U8', 'U8'],
                        'names': header}))
    return gur65

gur65 = load_gurevych()
gn    = load_germanet()

# select those words which are found in GermaNet; exclude the
# adjective "jung"
pred = lambda w1, w2: bool(gn.synsets(w1) and gn.synsets(w2) and
                           w1 != 'jung' and w2 != 'jung')

print('Semantic similarity computed on {0} of {1} word pairs'.format(
    sum([1 for word1, word2 in zip(gur65['Word1'], gur65['Word2'])
         if pred(word1, word2)]),
    len(gur65)))

sim_funcs = [('lch', Synset.sim_lch,  np.max),
             ('res', Synset.sim_res,  np.max),
             ('jcn', Synset.dist_jcn, np.min),
             ('lin', Synset.sim_lin,  np.max)]

print()
print('metric   r')
print('---------------')
for sim_name, sim_func, comb_func in sim_funcs:
    scores = []
    for word1, word2, human, po1, pos2 in gur65:
        if not pred(word1, word2):
            continue
        score = comb_func(np.array([sim_func(ss1, ss2)
                                    for ss1 in gn.synsets(word1)
                                    for ss2 in gn.synsets(word2)]))
        scores.append([score, human])
    scores = np.array(scores)
    r, _p = pearsonr(scores[:,0],scores[:,1])
    print('{0}      {1:.3f}'.format(sim_name, r))

  return fromrecords(obj, dtype=dtype, shape=shape, **kwds)


Semantic similarity computed on 65 of 65 word pairs

metric   r
---------------
lch      0.778
res      0.768
jcn      -0.809
lin      0.787
