# Freeling

* *Página web:* http://nlp.cs.upc.edu/freeling/
* *Consejos de instalación:* https://medium.com/@cristhian.fuertes/installation-of-freeling-with-python-7407797f5afd#.r0meg9dg0
* *Lista de clases:* http://nlp.lsi.upc.edu/freeling/doc/refman/annotated.html
* *Tagset:* http://nlp.lsi.upc.edu/freeling-old/doc/tagsets/tagset-es.html

In [24]:
#! /usr/bin/python3



import freeling
import sys

## ------------  output a parse tree ------------
def printTree(ptree, depth):

    node = ptree.begin();

    print(''.rjust(depth*2),end='');
    info = node.get_info();
    if (info.is_head()): print('+',end='');

    nch = node.num_children();
    if (nch == 0) :
        w = info.get_word();
        print ('({0} {1} {2})'.format(w.get_form(), w.get_lemma(), w.get_tag()),end='');

    else :
        print('{0}_['.format(info.get_label()));

        for i in range(nch) :
            child = node.nth_child_ref(i);
            printTree(child, depth+1);

        print(''.rjust(depth*2),end='');
        print(']',end='');
        
    print('');

## ------------  output a parse tree ------------
def printDepTree(dtree, depth):

    node = dtree.begin()

    print(''.rjust(depth*2),end='');

    info = node.get_info();
    link = info.get_link();
    linfo = link.get_info();
    print ('{0}/{1}/'.format(link.get_info().get_label(), info.get_label()),end='');

    w = node.get_info().get_word();
    print ('({0} {1} {2})'.format(w.get_form(), w.get_lemma(), w.get_tag()),end='');

    nch = node.num_children();
    if (nch > 0) :
        print(' [');

        for i in range(nch) :
            d = node.nth_child_ref(i);
            if (not d.begin().get_info().is_chunk()) :
                printDepTree(d, depth+1);

        ch = {};
        for i in range(nch) :
            d = node.nth_child_ref(i);
            if (d.begin().get_info().is_chunk()) :
                ch[d.begin().get_info().get_chunk_ord()] = d;
 
        for i in sorted(ch.keys()) :
            printDepTree(ch[i], depth + 1);

        print(''.rjust(depth*2),end='');
        print(']',end='');

    print('');





## Modify this line to be your FreeLing installation directory
FREELINGDIR = "/usr/local";

DATA = FREELINGDIR+"/share/freeling/";
LANG="es";

freeling.util_init_locale("default");

# create language analyzer
la=freeling.lang_ident(DATA+"common/lang_ident/ident.dat");

# create options set for maco analyzer. Default values are Ok, except for data files.
op= freeling.maco_options("es");
op.set_data_files( "", 
                   DATA + "common/punct.dat",
                   DATA + LANG + "/dicc.src",
                   DATA + LANG + "/afixos.dat",
                   "",
                   DATA + LANG + "/locucions.dat", 
                   DATA + LANG + "/np.dat",
                   DATA + LANG + "/quantities.dat",
                   DATA + LANG + "/probabilitats.dat");

# create analyzers
tk=freeling.tokenizer(DATA+LANG+"/tokenizer.dat");
sp=freeling.splitter(DATA+LANG+"/splitter.dat");
sid=sp.open_session();
mf=freeling.maco(op);

# activate mmorpho odules to be used in next call
mf.set_active_options(False, True, True, True,  # select which among created 
                      True, True, False, True,  # submodules are to be used. 
                      True, True, True, True ); # default: all created submodules are used

# create tagger, sense anotator, and parsers
tg=freeling.hmm_tagger(DATA+LANG+"/tagger.dat",True,2);
sen=freeling.senses(DATA+LANG+"/senses.dat");
parser= freeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat");
dep=freeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol());

In [25]:
# Texto a analizar

text = "El perro ha saltado la valla. Estoy preocupada, quizás se pierda o ataque a alguien."

In [26]:
#Separa palabras

l = tk.tokenize(text);
for w in l:
    print(w.get_form())
print()

El
perro
ha
saltado
la
valla
.
Estoy
preocupada
,
quizás
se
pierda
o
ataque
a
alguien
.



In [27]:
#Separa oraciones

ls = sp.split(sid, l, False);
idx = 1
for s in ls:
    ws = s.get_words();
    print('***sentence'+' '+str(idx)+'***')
    for w in ws :
        print(w.get_form())
    print()
    idx += 1
print()

***sentence 1***
El
perro
ha
saltado
la
valla
.

***sentence 2***
Estoy
preocupada
,
quizás
se
pierda
o
ataque
a
alguien
.




In [28]:
#Análisis morfológico
# (se supone que este análisis devuelva todas las opciones posibles de lema-tag, pero no encuentro la forma de mostralas todas)

ls = mf.analyze(ls);
idx = 1
for s in ls:
    ws = s.get_words();
    print('***sentence'+' '+str(idx)+'***')
    for w in ws :
        print(w.get_form()+" "+w.get_lemma()+" "+w.get_tag())
    print()
    idx += 1
print()

***sentence 1***
El el DA0MS0
perro perro NCMS000
ha haber VAIP3S0
saltado saltar VMP00SM
la el DA0FS0
valla valla NCFS000
. . Fp

***sentence 2***
Estoy estar VMIP1S0
preocupada preocupar VMP00SF
, , Fc
quizás quizás RG
se se P00CN00
pierda perder VMM03S0
o o CC
ataque ataque NCMS000
a a SP
alguien alguien PI0CS00
. . Fp




In [29]:
#POS tagging
#desambigua, es decir, selecciona el lema y tag correcto según el contexto

ls = tg.analyze(ls);
idx = 1
for s in ls:
    ws = s.get_words();
    print('***sentence'+' '+str(idx)+'***')
    for w in ws :
        print(w.get_form()+" "+w.get_lemma()+" "+w.get_tag())
    print()
    idx += 1
print()

***sentence 1***
El el DA0MS0
perro perro NCMS000
ha haber VAIP3S0
saltado saltar VMP00SM
la el DA0FS0
valla valla NCFS000
. . Fp

***sentence 2***
Estoy estar VMIP1S0
preocupada preocupar VMP00SF
, , Fc
quizás quizás RG
se se P00CN00
pierda perder VMSP3S0
o o CC
ataque atacar VMSP3S0
a a SP
alguien alguien PI0CS00
. . Fp




In [30]:
#Sentidos

ls = sen.analyze(ls);
idx = 1
for s in ls:
    ws = s.get_words();
    print('***sentence'+' '+str(idx)+'***')
    for w in ws :
        print(w.get_form()+" "+w.get_lemma()+" "+w.get_tag()+" "+w.get_senses_string())
    print()
    idx += 1
print()

***sentence 1***
El el DA0MS0 
perro perro NCMS000 02084071-n:0/10539715-n:0
ha haber VAIP3S0 02603699-v:0/02655135-v:0
saltado saltar VMP00SM 00256369-v:0/00616498-v:0/01236941-v:0/01892608-v:0/01910373-v:0/01963942-v:0/01965654-v:0/02081946-v:0/02094922-v:0/02095060-v:0/02095211-v:0
la el DA0FS0 
valla valla NCFS000 
. . Fp 

***sentence 2***
Estoy estar VMIP1S0 02655135-v:0/02729963-v:0
preocupada preocupar VMP00SF 01765908-v:0/01767163-v:0/01783394-v:0/02678438-v:0
, , Fc 
quizás quizás RG 
se se P00CN00 
pierda perder VMSP3S0 00059769-v:0/01099592-v:0/01113806-v:0/02022659-v:0/02127853-v:0/02197091-v:0/02287618-v:0/02287789-v:0/02288155-v:0/02288828-v:0/02303331-v:0
o o CC 
ataque atacar VMSP3S0 00019792-v:0/00862683-v:0/01118449-v:0/01119169-v:0/01120069-v:0
a a SP 
alguien alguien PI0CS00 
. . Fp 




In [31]:
#Análisis sintáctico

ls = parser.analyze(ls);
for s in ls :
    tr = s.get_parse_tree();
    printTree(tr, 0);

S_[
  sn_[
    espec-ms_[
      +j-ms_[
        +(El el DA0MS0)
      ]
    ]
    +grup-nom-ms_[
      +n-ms_[
        +(perro perro NCMS000)
      ]
    ]
  ]
  grup-verb_[
    +verb_[
      vaux_[
        +(ha haber VAIP3S0)
      ]
      +parti_[
        +(saltado saltar VMP00SM)
      ]
    ]
  ]
  sn_[
    espec-fs_[
      +j-fs_[
        +(la el DA0FS0)
      ]
    ]
    +grup-nom-fs_[
      +n-fs_[
        +(valla valla NCFS000)
      ]
    ]
  ]
  F-term_[
    +(. . Fp)
  ]
]
S_[
  grup-verb_[
    +verb_[
      +(Estoy estar VMIP1S0)
    ]
  ]
  parti-flex_[
    +parti-fs_[
      +(preocupada preocupar VMP00SF)
    ]
  ]
  (, , Fc)
  sadv_[
    +(quizás quizás RG)
  ]
  grup-verb_[
    morfema-verbal_[
      +(se se P00CN00)
    ]
    +grup-verb_[
      +verb_[
        +(pierda perder VMSP3S0)
      ]
    ]
  ]
  coord_[
    +(o o CC)
  ]
  grup-verb_[
    +verb_[
      +(ataque atacar VMSP3S0)
    ]
  ]
  grup-sp_[
    +prep_[
      +(a a SP)
    ]
    sn_[
      +pron-ms_[
  

In [32]:
output = freeling.output_json()
output.PrintResults(ls)

'      { "id":"1",\n        "tokens" : [\n           { "id" : "t1.1", "begin" : "0", "end" : "2", "form" : "El", "lemma" : "el", "tag" : "DA0MS0"},\n           { "id" : "t1.2", "begin" : "3", "end" : "8", "form" : "perro", "lemma" : "perro", "tag" : "NCMS000", "wn" : "02084071-n"},\n           { "id" : "t1.3", "begin" : "9", "end" : "11", "form" : "ha", "lemma" : "haber", "tag" : "VAIP3S0", "wn" : "02603699-v"},\n           { "id" : "t1.4", "begin" : "12", "end" : "19", "form" : "saltado", "lemma" : "saltar", "tag" : "VMP00SM", "wn" : "00256369-v"},\n           { "id" : "t1.5", "begin" : "20", "end" : "22", "form" : "la", "lemma" : "el", "tag" : "DA0FS0"},\n           { "id" : "t1.6", "begin" : "23", "end" : "28", "form" : "valla", "lemma" : "valla", "tag" : "NCFS000"},\n           { "id" : "t1.7", "begin" : "28", "end" : "29", "form" : ".", "lemma" : ".", "tag" : "Fp"}],\n        "constituents" : [\n          {"label" : "S", "children" : [\n            {"label" : "sn", "children" : [\

In [33]:
output = freeling.output_xml()
output.PrintResults(ls)

'<sentence id="1">\n  <token id="t1.1" begin="0" end="2" form="El" lemma="el" tag="DA0MS0" >\n  </token>\n  <token id="t1.2" begin="3" end="8" form="perro" lemma="perro" tag="NCMS000" wn="02084071-n" >\n  </token>\n  <token id="t1.3" begin="9" end="11" form="ha" lemma="haber" tag="VAIP3S0" wn="02603699-v" >\n  </token>\n  <token id="t1.4" begin="12" end="19" form="saltado" lemma="saltar" tag="VMP00SM" wn="00256369-v" >\n  </token>\n  <token id="t1.5" begin="20" end="22" form="la" lemma="el" tag="DA0FS0" >\n  </token>\n  <token id="t1.6" begin="23" end="28" form="valla" lemma="valla" tag="NCFS000" >\n  </token>\n  <token id="t1.7" begin="28" end="29" form="." lemma="." tag="Fp" >\n  </token>\n  <constituents>\n    <node label="S" >\n      <node label="sn" >\n        <node label="espec-ms" >\n          <node head="1" label="j-ms" >\n            <node leaf="1" head="1" token="t1.1" word="El" />\n          </node>\n        </node>\n        <node head="1" label="grup-nom-ms" >\n          <n

In [34]:
#Análisis de dependencias

ls = dep.analyze(ls);

for s in ls:
    dp = s.get_dep_tree();
    printDepTree(dp, 0)

grup-verb/top/(saltado saltar VMP00SM) [
  vaux/aux/(ha haber VAIP3S0)
  sn/subj/(perro perro NCMS000) [
    espec-ms/spec/(El el DA0MS0)
  ]
  sn/dobj/(valla valla NCFS000) [
    espec-fs/spec/(la el DA0FS0)
  ]
  F-term/punc/(. . Fp)
]
coor-vb/top/(o o CC) [
  grup-verb/coor/(Estoy estar VMIP1S0) [
    subord-part/attr/(preocupada preocupar VMP00SF)
  ]
  Fc/punc/(, , Fc)
  grup-verb/coor/(pierda perder VMSP3S0) [
    morfema-verbal/mphes/(se se P00CN00)
    sadv/adjt/(quizás quizás RG)
  ]
  grup-verb/coor/(ataque atacar VMSP3S0) [
    grup-sp/adjt/(a a SP) [
      sn/comp/(alguien alguien PI0CS00)
    ]
  ]
  F-term/punc/(. . Fp)
]


In [35]:
output = freeling.output_xml()
output.PrintResults(ls)

'<sentence id="1">\n  <token id="t1.1" begin="0" end="2" form="El" lemma="el" tag="DA0MS0" >\n  </token>\n  <token id="t1.2" begin="3" end="8" form="perro" lemma="perro" tag="NCMS000" wn="02084071-n" >\n  </token>\n  <token id="t1.3" begin="9" end="11" form="ha" lemma="haber" tag="VAIP3S0" wn="02603699-v" >\n  </token>\n  <token id="t1.4" begin="12" end="19" form="saltado" lemma="saltar" tag="VMP00SM" wn="00256369-v" >\n  </token>\n  <token id="t1.5" begin="20" end="22" form="la" lemma="el" tag="DA0FS0" >\n  </token>\n  <token id="t1.6" begin="23" end="28" form="valla" lemma="valla" tag="NCFS000" >\n  </token>\n  <token id="t1.7" begin="28" end="29" form="." lemma="." tag="Fp" >\n  </token>\n  <constituents>\n    <node head="1" label="grup-verb" >\n      <node label="sn" >\n        <node label="espec-ms" >\n          <node head="1" label="j-ms" >\n            <node leaf="1" head="1" token="t1.1" word="El" />\n          </node>\n        </node>\n        <node head="1" label="grup-nom-ms

In [36]:
# todo junto

l = tk.tokenize(text);
ls = sp.split(sid,l,False);

ls = mf.analyze(ls);
ls = tg.analyze(ls);
ls = sen.analyze(ls);
ls = parser.analyze(ls);
ls = dep.analyze(ls);

## output results
for s in ls :
   ws = s.get_words();
   for w in ws :
      print(w.get_form()+" "+w.get_lemma()+" "+w.get_tag()+" "+w.get_senses_string());
   print ("");

   tr = s.get_parse_tree();
   printTree(tr, 0);

   dp = s.get_dep_tree();
   printDepTree(dp, 0)

El el DA0MS0 
perro perro NCMS000 02084071-n:0/10539715-n:0
ha haber VAIP3S0 02603699-v:0/02655135-v:0
saltado saltar VMP00SM 00256369-v:0/00616498-v:0/01236941-v:0/01892608-v:0/01910373-v:0/01963942-v:0/01965654-v:0/02081946-v:0/02094922-v:0/02095060-v:0/02095211-v:0
la el DA0FS0 
valla valla NCFS000 
. . Fp 

+grup-verb_[
  sn_[
    espec-ms_[
      +j-ms_[
        +(El el DA0MS0)
      ]
    ]
    +grup-nom-ms_[
      +n-ms_[
        +(perro perro NCMS000)
      ]
    ]
  ]
  +verb_[
    vaux_[
      +(ha haber VAIP3S0)
    ]
    +parti_[
      +(saltado saltar VMP00SM)
    ]
  ]
  sn_[
    espec-fs_[
      +j-fs_[
        +(la el DA0FS0)
      ]
    ]
    +grup-nom-fs_[
      +n-fs_[
        +(valla valla NCFS000)
      ]
    ]
  ]
  F-term_[
    +(. . Fp)
  ]
]
grup-verb/top/(saltado saltar VMP00SM) [
  vaux/aux/(ha haber VAIP3S0)
  sn/subj/(perro perro NCMS000) [
    espec-ms/spec/(El el DA0MS0)
  ]
  sn/dobj/(valla valla NCFS000) [
    espec-fs/spec/(la el DA0FS0)
  ]
  F-term/p