In [16]:
#*************************************************************************
#1.Importamos la libreria NLTK
#*************************************************************************
import nltk


#*************************************************************************
#2.Creamos una texto de entrada a nuestra cadena NLP
#*************************************************************************
text = "I didn't notice my animals were uglier than yours! I'm sorry..."
print ("\n\n1. Texto:",text)


#*************************************************************************
#2.Dividimos el texto en frases
#*************************************************************************
sentences = nltk.tokenize.sent_tokenize(text)
print ("\n\n2. Frases:",sentences)


#*****************************************************************************
#3.Tokenización: tokenizamos el texto, es decir dividimos el texto en tokens
#*****************************************************************************
tokens = nltk.word_tokenize(text)
tokensLimpio = []
print ("\n\n3. Tokens:",tokens)
for (tok) in tokens:
    #wordnet no contiene las formas abreviadas 'm  y  n't así que las introducimos nosotros para que lematice bien
    if tok=='\'m':
        tok = 'am'
    if tok=='\'s':
        tok = 'is'
    if tok=='n\'t':
        tok = 'not'
    if tok!='!' and tok!='...':
        tokensLimpio.append(tok)
print ("\n\n3. Tokens limpio:",tokensLimpio)
        
#*************************************************************************
#4.Análisis morfológico: asignamos una etiqueta morfologica a cada token
#*************************************************************************
tagged = nltk.pos_tag(tokensLimpio)
print ("\n\n4. Analisis Morfologico:",tagged)


#*******************************************************************    
#7.Análisis sintáctico
#******************************************************************* 
sent = tokensLimpio

#Creamos nuestra propia Gramatica Libre de Contexto (en inglés CFG)
grammar = nltk.CFG.fromstring("""
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
Det -> 'an' | 'my' | 'yours' | 'sorry' | 'uglier'
N -> 'animals'
V -> 'notice' | 'did' | 'were' | 'am'
P -> 'in' | 'not' | 'than'
""")


#Generamos un parser sintáctico capaz de reconocer la gramática
parser = nltk.ChartParser(grammar, trace=1)
print ('\n\n\n7. Analisis sintactico:\n')
for tree in parser.parse(tokensLimpio):
    print(tree,'\n')
    tree.draw()
# nltk.parse.chart.demo(2, print_times=False, trace=1, sent='I saw a dog', numparses=1)



1. Texto: I didn't notice my animals were uglier than yours! I'm sorry...


2. Frases: ["I didn't notice my animals were uglier than yours!", "I'm sorry..."]


3. Tokens: ['I', 'did', "n't", 'notice', 'my', 'animals', 'were', 'uglier', 'than', 'yours', '!', 'I', "'m", 'sorry', '...']


3. Tokens limpio: ['I', 'did', 'not', 'notice', 'my', 'animals', 'were', 'uglier', 'than', 'yours', 'I', 'am', 'sorry']


4. Analisis Morfologico: [('I', 'PRP'), ('did', 'VBD'), ('not', 'RB'), ('notice', 'VB'), ('my', 'PRP$'), ('animals', 'NNS'), ('were', 'VBD'), ('uglier', 'JJR'), ('than', 'IN'), ('yours', 'JJR'), ('I', 'PRP'), ('am', 'VBP'), ('sorry', 'JJ')]



7. Analisis sintactico:

|.I .di.no.no.my.an.we.ug.th.yo.I .am.so.|
|[--]  .  .  .  .  .  .  .  .  .  .  .  .| [0:1] 'I'
|.  [--]  .  .  .  .  .  .  .  .  .  .  .| [1:2] 'did'
|.  .  [--]  .  .  .  .  .  .  .  .  .  .| [2:3] 'not'
|.  .  .  [--]  .  .  .  .  .  .  .  .  .| [3:4] 'notice'
|.  .  .  .  [--]  .  .  .  .  .  .  .  .| [4:5] 'my'
|.