In [130]:
from pattern.vector import count, words
from pattern.vector import stem, PORTER, LEMMA
from pattern.vector import Document, Model, TFIDF, IG, BINARY
from pattern.vector import Vector, distance, tfidf

from pattern.en import parse, Sentence, parsetree
from pattern.en import wordnet, NOUN, VERB, ADJECTIVE, ADVERB
from pattern.en import sentiment

from pandas import Series, DataFrame
import pandas as pd
import numpy as np

# from nltk.corpus import stopwords
# from nltk.corpus import wordnet as wn

from textblob import TextBlob
# from textblob.wordnet import VERB, Synset

PA_WORDS  = ['buy', 'recommend', 'hire', 'have', 'suggest', 'advise', 'want', 'need', 'purchase', 'wish', 'pay']
VERB_TAGS = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
NOUN_TAGS = ['NN', 'NNS', 'NNP', 'NNPS']

In [2]:
data = [
  {"text": "I need silence, and to be alone and to go out, and to save one hour…", "class": "non-pi"},
  {"text": "I need more tiaras. I haven't bought a new one in over a year and that's a shame. Send an Amazon gift card so I can fix this today! goddessjoules@gmail.com", "class": "pi"},  
  {"text": "Hi, recently I was wondering if it pays to buy gtx 1070 or wait for new this year's nvidia graphics cards", "class": "pi"},
  {"text": "I have been planning for years to immigrate to Canada, the heaven of justice! and now it’s over a year/years that most of #OutlandIranianApplicants are stuck in the assessment limbo of @CitImmCanada. @AhmedDHussen @JustinTrudeau @RalphGoodale @ICCongress", "class": "non-pi"}
]

In [3]:
d1 = Document(data[0]['text'], type=data[0]['class'], stemmer=LEMMA)
d2 = Document(data[1]['text'], type=data[1]['class'], stemmer=LEMMA)
d3 = Document(data[2]['text'], type=data[2]['class'], stemmer=LEMMA)
d4 = Document(data[3]['text'], type=data[3]['class'], stemmer=LEMMA)

In [4]:
m = Model(documents=[d1, d2, d3, d4], weight=TFIDF)

In [5]:
m.vectors

[{'silence': 1.3862952936151878},
 {'tiara': 0.1540328104016875,
  't': 0.1540328104016875,
  'bought': 0.1540328104016875,
  'shame': 0.1540328104016875,
  'send': 0.1540328104016875,
  'amazon': 0.1540328104016875,
  'gift': 0.1540328104016875,
  'card': 0.07701640520084375,
  'fix': 0.1540328104016875},
 {'hi': 0.12602684487410798,
  'recently': 0.12602684487410798,
  'wondering': 0.12602684487410798,
  'pay': 0.12602684487410798,
  'buy': 0.12602684487410798,
  'gtx': 0.12602684487410798,
  '1070': 0.12602684487410798,
  'wait': 0.12602684487410798,
  'nvidium': 0.12602684487410798,
  'graphic': 0.12602684487410798,
  'card': 0.06301342243705399},
 {'planning': 0.09902109240108484,
  'immigrate': 0.09902109240108484,
  'canada': 0.09902109240108484,
  'heaven': 0.09902109240108484,
  'justice': 0.09902109240108484,
  'outlandiranianapplicant': 0.09902109240108484,
  'stuck': 0.09902109240108484,
  'assessment': 0.09902109240108484,
  'limbo': 0.09902109240108484,
  'citimmcanada': 

In [6]:
m.classes

['non-pi', 'pi']

In [7]:
m.weight

'tf-idf'

In [8]:
m.df('buy')

0.25

In [9]:
m.idf('buy')

1.3862952936151878

In [10]:
m.similarity(d1, d2)

0.0

In [11]:
m.similarity(d1, d3)

0.0

In [12]:
m.similarity(d1, d4)

0.0

In [13]:
m.similarity(d2, d3)

0.027186362391351835

In [14]:
m.similarity(d2, d4)

0.0

In [15]:
m.similarity(d3, d4)

0.0

In [16]:
d3.vector

{'hi': 0.12602684487410798,
 'recently': 0.12602684487410798,
 'wondering': 0.12602684487410798,
 'pay': 0.12602684487410798,
 'buy': 0.12602684487410798,
 'gtx': 0.12602684487410798,
 '1070': 0.12602684487410798,
 'wait': 0.12602684487410798,
 'nvidium': 0.12602684487410798,
 'graphic': 0.12602684487410798,
 'card': 0.06301342243705399}

In [17]:
tfidf([d1.vector, d2.vector, d3.vector, d4.vector])

[{'silence': 1.9218146410996197},
 {'tiara': 0.21353496012217993,
  't': 0.21353496012217993,
  'bought': 0.21353496012217993,
  'shame': 0.21353496012217993,
  'send': 0.21353496012217993,
  'amazon': 0.21353496012217993,
  'gift': 0.21353496012217993,
  'card': 0.05338374003054498,
  'fix': 0.21353496012217993},
 {'hi': 0.17471042191814726,
  'recently': 0.17471042191814726,
  'wondering': 0.17471042191814726,
  'pay': 0.17471042191814726,
  'buy': 0.17471042191814726,
  'gtx': 0.17471042191814726,
  '1070': 0.17471042191814726,
  'wait': 0.17471042191814726,
  'nvidium': 0.17471042191814726,
  'graphic': 0.17471042191814726,
  'card': 0.043677605479536814},
 {'planning': 0.13727247436425855,
  'immigrate': 0.13727247436425855,
  'canada': 0.13727247436425855,
  'heaven': 0.13727247436425855,
  'justice': 0.13727247436425855,
  'outlandiranianapplicant': 0.13727247436425855,
  'stuck': 0.13727247436425855,
  'assessment': 0.13727247436425855,
  'limbo': 0.13727247436425855,
  'citimm

In [18]:
def remove_stop_words(word_list):
    if not isinstance(word_list, list):
        raise (TypeError, '`word_list` argument must be a list')
    return [
        word for word in word_list if word not in set(stopwords.words('english'))
        ]

def extract_pos(blob, POS_TAGS):
    if not isinstance(blob, TextBlob):
        raise (TypeError, '`blob` argument must be a TextBlob')
    if not isinstance(POS_TAGS, list):
        raise (TypeError, '`POS_TAGS` argument must be a list')        
    return filter(lambda x: x[1] in POS_TAGS, blob.tags)

def extract_terms(blob, pos):
    if pos.lower() == 'verb' or pos.lower() == 'vb':
        verbs = extract_pos(blob, VERB_TAGS)
        return [x[0] for x in verbs]
    elif pos.lower() == 'noun' or pos.lower() == 'nn':
        nouns = extract_pos(blob, NOUN_TAGS)
        return [x[0] for x in nouns]
    elif pos.lower() == 'pronoun' or pos.lower() == 'pn':
        raise(Exception('Not implemented'))
    elif pos.lower() == 'adjective' or pos.lower() == 'adj':
        raise(Exception('Not implemented'))
    elif pos.lower() == 'adverb' or pos.lower() == 'adv':
        raise(Exception('Not implemented'))
    elif pos.lower() == 'preposition' or pos.lower() == 'pre':
        raise(Exception('Not implemented'))
    elif pos.lower() == 'conjunction' or pos.lower() == 'cn':
        raise(Exception('Not implemented'))
    elif pos.lower() == 'interjection' or pos.lower() == 'in':
        raise(Exception('Not implemented'))
    else:
        raise(Exception('Please provide valid pos. verb, noun, pronoun, adjective')) 

In [19]:
b3 = TextBlob(data[2]['text'])

In [20]:
b3

TextBlob("Hi, recently I was wondering if it pays to buy gtx 1070 or wait for new this year's nvidia graphics cards")

In [21]:
verbs = extract_terms(b3, 'verb')

In [22]:
verbs

['was', 'wondering', 'pays', 'buy', 'wait']

In [23]:
s = wordnet.synsets('bird')[0]

In [24]:
s

Synset('bird.n.01')

In [25]:
bird_synsets = wordnet.synsets('bird')
bird_synsets

[Synset('bird.n.01'),
 Synset('bird.n.02'),
 Synset('dame.n.01'),
 Synset('boo.n.01'),
 Synset('shuttlecock.n.01')]

In [26]:
synset_1 = wordnet.synsets('buy', pos=VERB)[0]

In [27]:
synset_2 = wordnet.synsets('buy', pos=VERB)[0]

In [28]:
synset_1.gloss

'obtain by purchase; acquire by means of a financial transaction'

In [29]:
synset_1.synonyms

['buy', 'purchase']

In [30]:
synset_1.similarity(synset_2)

1.0

In [31]:
sentiment(data[2]['text'])[0]

0.06818181818181818

In [32]:
doc3 = data[2]['text']

In [33]:
parsed = parse(doc3)

In [34]:
parsed.tags

['word', 'part-of-speech', 'chunk', 'preposition']

In [35]:
doc3

"Hi, recently I was wondering if it pays to buy gtx 1070 or wait for new this year's nvidia graphics cards"

In [36]:
parse(doc3).split()

[[['Hi', 'UH', 'O', 'O'],
  [',', ',', 'O', 'O'],
  ['recently', 'RB', 'B-ADVP', 'O'],
  ['I', 'PRP', 'B-NP', 'O'],
  ['was', 'VBD', 'B-VP', 'O'],
  ['wondering', 'VBG', 'I-VP', 'O'],
  ['if', 'IN', 'B-PP', 'B-PNP'],
  ['it', 'PRP', 'B-NP', 'I-PNP'],
  ['pays', 'VBZ', 'B-VP', 'O'],
  ['to', 'TO', 'I-VP', 'O'],
  ['buy', 'VB', 'I-VP', 'O'],
  ['gtx', 'NN', 'B-NP', 'O'],
  ['1070', 'CD', 'I-NP', 'O'],
  ['or', 'CC', 'I-NP', 'O'],
  ['wait', 'NN', 'I-NP', 'O'],
  ['for', 'IN', 'B-PP', 'O'],
  ['new', 'JJ', 'B-ADJP', 'O'],
  ['this', 'DT', 'B-NP', 'O'],
  ['year', 'NN', 'I-NP', 'O'],
  ["'s", 'POS', 'O', 'O'],
  ['nvidia', 'NN', 'B-NP', 'O'],
  ['graphics', 'NNS', 'I-NP', 'O'],
  ['cards', 'NNS', 'I-NP', 'O']]]

In [37]:
s = parsetree(doc3, relations=True, lemmata=True)

In [38]:
chunks = s[0].chunks

In [40]:
for chunk in chunks:
    print("String :", chunk.string)
    print("Type   :", chunk.type)
    print("Role   :", chunk.role)
    print("Related:", chunk.related)
    print("Head   :", chunk.related)
    print()

String : recently
Type   : ADVP
Role   : None
Related: []
Head   : []

String : I
Type   : NP
Role   : SBJ
Related: [Chunk('was wondering/VP-1')]
Head   : [Chunk('was wondering/VP-1')]

String : was wondering
Type   : VP
Role   : None
Related: [Chunk('I/NP-SBJ-1')]
Head   : [Chunk('I/NP-SBJ-1')]

String : if
Type   : PP
Role   : None
Related: []
Head   : []

String : it
Type   : NP
Role   : SBJ
Related: [Chunk('pays to buy/VP-2'), Chunk('gtx 1070 or wait/NP-OBJ-2')]
Head   : [Chunk('pays to buy/VP-2'), Chunk('gtx 1070 or wait/NP-OBJ-2')]

String : pays to buy
Type   : VP
Role   : None
Related: [Chunk('it/NP-SBJ-2'), Chunk('gtx 1070 or wait/NP-OBJ-2')]
Head   : [Chunk('it/NP-SBJ-2'), Chunk('gtx 1070 or wait/NP-OBJ-2')]

String : gtx 1070 or wait
Type   : NP
Role   : OBJ
Related: [Chunk('it/NP-SBJ-2'), Chunk('pays to buy/VP-2')]
Head   : [Chunk('it/NP-SBJ-2'), Chunk('pays to buy/VP-2')]

String : for
Type   : PP
Role   : None
Related: []
Head   : []

String : new
Type   : ADJP
Role   : N

In [133]:
def find_ado(document):
    s = parsetree(document, relations=True, lemmata=True)
    for sent in s:
        for chunk in sent.chunks:
            if chunk.type == 'VP':
                print("Subject          : ", chunk.subject)            
                print("[VP]             : ", chunk.string)
                print("[VP] [WORDS]     : ", chunk.words)       
                if chunk.object is not None:
                    print("Object [WORDS]   : ", chunk.object.words)
                    nouns = filter(lambda x: x.type in NOUN_TAGS, chunk.object.words)        
                    nouns = set([noun.string for noun in nouns])
                    print("Extracted Nouns  :", nouns)
                else:
                    print("Object           : ", chunk.object)            
                print('-'*80)

In [46]:
def _get_verbs(document):        
    blob = TextBlob(document)        
    tags = filter(lambda x: x[1] in VERB_TAGS, blob.tags)            
    return [verb[0] for verb in tags]

In [47]:
_get_verbs(doc3)

['was', 'wondering', 'pays', 'buy', 'wait']

In [56]:
def _get_pa_similarity( word ):
    s = wordnet.synsets(word, pos=VERB)[0]

    similarities = []
    append_sim = similarities.append

    for paword in PA_WORDS:
        pa_s = wordnet.synsets(paword, pos=VERB)[0]
        append_sim( s.similarity(pa_s) )

    print(similarities)
    return max(similarities)

In [58]:
_get_pa_similarity('buy')

[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]


1.0

In [57]:
_get_pa_similarity('purchase')

[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]


1.0

In [59]:
_get_pa_similarity('pay')

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


0.0

In [60]:
_get_pa_similarity('obtain')

[0.5654954205996426, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5654954205996426, 0.0]


0.5654954205996426

In [65]:
wordnet.synsets('obtain', pos=VERB)

[Synset('obtain.v.01'), Synset('receive.v.02'), Synset('prevail.v.02')]

In [67]:
extracted_verbs = _get_verbs(doc3)

In [69]:
count_rows = len(extracted_verbs)
count_columns = len(PA_WORDS)
frame = DataFrame(np.empty((count_rows, count_columns)), index=extracted_verbs, columns=PA_WORDS)

In [70]:
frame

Unnamed: 0,buy,recommend,hire,have,suggest,advise,want,need,purchase,wish,pay
was,6.931999e-310,1.122473e-316,4.207069e-310,7.764465000000001e+69,1.685826e-312,2.561371e-300,1.333605e+241,1.735326e-309,1.807805e+60,1.855586e-312,5.216755e-307
wondering,-3.384608e+125,3.6902890000000003e+180,1.199333e-302,1.094038e-303,1.433734e-298,1.184992e-303,1.8231529999999998e-303,1.184994e-303,1.09385e-303,1.00271e-303,8.204348e-304
pays,4.681025e-188,5.99793e+19,2.710567e-24,3.796058e+77,1.634523e+87,2.657113e+82,3.2122630000000002e+106,5.038551e-111,4.720251e+154,9.249271e-304,9.251051e-304
buy,1.2502070000000001e+63,8.298721e-304,1.272151e-178,5.5161169999999995e-294,4.262554e+24,9.714609e+303,3.738515e-217,5.545157e-294,9.604798e+303,3.7700470000000004e-265,3.5944920000000003e-289
wait,1.093077e+145,8.245236999999999e-304,9.11571e-304,7.16865e-299,5.5673409999999996e-294,3.882846e+130,5.6334309999999995e+255,1.391582e+116,3.2476510000000002e+106,3.311374e-183,2.213e-321


In [71]:
for verb in extracted_verbs:
    s = wordnet.synsets(verb, pos=VERB)[0]               
    for paword in PA_WORDS:
        pa_s = wordnet.synsets(paword, pos=VERB)[0]                
        frame[paword][verb] = s.similarity(pa_s)

In [72]:
frame

Unnamed: 0,buy,recommend,hire,have,suggest,advise,want,need,purchase,wish,pay
was,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
wondering,0.0,0.0,0.0,0.0,0.0,0.360471,0.0,0.0,0.0,0.0,0.0
pays,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
buy,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
wait,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [73]:
wpa_max_columns = frame.max()

In [74]:
wpa_max_columns

buy          1.000000
recommend    0.000000
hire         0.000000
have         0.000000
suggest      0.000000
advise       0.360471
want         0.000000
need         0.000000
purchase     1.000000
wish         0.000000
pay          1.000000
dtype: float64

In [75]:
sentiment('What are some good places to have lunch in Palo Alto?')

(0.7, 0.6000000000000001)

In [83]:
for i in range(8):
    if i == 3:
        break
    else:
        print(i)

0
1
2


In [101]:
doc5 = "I want to purchase a new phone, should i go for the iphone x or google pixel?"

In [135]:
find_ado(data[2]['text'])

Subject          :  I
[VP]             :  was wondering
[VP] [WORDS]     :  [Word('was/VBD'), Word('wondering/VBG')]
Object           :  None
--------------------------------------------------------------------------------
Subject          :  it
[VP]             :  pays to buy
[VP] [WORDS]     :  [Word('pays/VBZ'), Word('to/TO'), Word('buy/VB')]
Object [WORDS]   :  [Word('gtx/NN'), Word('1070/CD'), Word('or/CC'), Word('wait/NN')]
Extracted Nouns  : {'gtx', 'wait'}
--------------------------------------------------------------------------------


In [134]:
find_ado(doc5)

Subject          :  I
[VP]             :  want to purchase
[VP] [WORDS]     :  [Word('want/VBP'), Word('to/TO'), Word('purchase/VB')]
Object [WORDS]   :  [Word('a/DT'), Word('new/JJ'), Word('phone/NN')]
Extracted Nouns  : {'phone'}
--------------------------------------------------------------------------------
Subject          :  None
[VP]             :  should i go
[VP] [WORDS]     :  [Word('should/MD'), Word('i/VB'), Word('go/VB')]
Object           :  None
--------------------------------------------------------------------------------


In [136]:
find_ado(data[3]['text'])

Subject          :  I
[VP]             :  have been planning
[VP] [WORDS]     :  [Word('have/VBP'), Word('been/VBN'), Word('planning/VBG')]
Object           :  None
--------------------------------------------------------------------------------
Subject          :  years
[VP]             :  to immigrate
[VP] [WORDS]     :  [Word('to/TO'), Word('immigrate/VB')]
Object           :  None
--------------------------------------------------------------------------------
Subject          :  OutlandIranianApplicants
[VP]             :  are stuck
[VP] [WORDS]     :  [Word('are/VBP'), Word('stuck/VBN')]
Object           :  None
--------------------------------------------------------------------------------
Subject          :  None
[VP]             :  @AhmedDHussen
[VP] [WORDS]     :  [Word('@AhmedDHussen/VBN')]
Object [WORDS]   :  [Word('@JustinTrudeau/NN'), Word('@RalphGoodale/NN'), Word('@ICCongress/NN')]
Extracted Nouns  : {'@ICCongress', '@RalphGoodale', '@JustinTrudeau'}
------------------

In [137]:
data[3]['text']

'I have been planning for years to immigrate to Canada, the heaven of justice! and now it’s over a year/years that most of #OutlandIranianApplicants are stuck in the assessment limbo of @CitImmCanada. @AhmedDHussen @JustinTrudeau @RalphGoodale @ICCongress'

In [138]:
find_ado(data[1]['text'])

Subject          :  I
[VP]             :  need
[VP] [WORDS]     :  [Word('need/VBP')]
Object [WORDS]   :  [Word('more/JJR'), Word('tiaras/NNS')]
Extracted Nouns  : {'tiaras'}
--------------------------------------------------------------------------------
Subject          :  I
[VP]             :  have n't bought
[VP] [WORDS]     :  [Word('have/VBP'), Word("n't/RB"), Word('bought/VBN')]
Object           :  None
--------------------------------------------------------------------------------
Subject          :  None
[VP]             :  's
[VP] [WORDS]     :  [Word("'s/VBZ")]
Object [WORDS]   :  [Word('a/DT'), Word('shame/NN')]
Extracted Nouns  : {'shame'}
--------------------------------------------------------------------------------
Subject          :  None
[VP]             :  Send
[VP] [WORDS]     :  [Word('Send/VB')]
Object [WORDS]   :  [Word('an/DT'), Word('Amazon/NNP'), Word('gift/NN'), Word('card/NN')]
Extracted Nouns  : {'card', 'gift', 'Amazon'}
---------------------------------

In [139]:
data[1]['text']

"I need more tiaras. I haven't bought a new one in over a year and that's a shame. Send an Amazon gift card so I can fix this today! goddessjoules@gmail.com"

In [140]:
doc6 = "Can you please recommend some good place to have lunch in Bangalore?"

In [141]:
b6 = TextBlob(doc6)

In [142]:
b6.tags

[('Can', 'MD'),
 ('you', 'PRP'),
 ('please', 'VB'),
 ('recommend', 'VB'),
 ('some', 'DT'),
 ('good', 'JJ'),
 ('place', 'NN'),
 ('to', 'TO'),
 ('have', 'VB'),
 ('lunch', 'NN'),
 ('in', 'IN'),
 ('Bangalore', 'NNP')]

In [143]:
find_ado(doc6)

Subject          :  None
[VP]             :  Can
[VP] [WORDS]     :  [Word('Can/MD')]
Object [WORDS]   :  [Word('you/PRP')]
Extracted Nouns  : set()
--------------------------------------------------------------------------------
Subject          :  you
[VP]             :  please recommend
[VP] [WORDS]     :  [Word('please/VB'), Word('recommend/VB')]
Object [WORDS]   :  [Word('some/DT'), Word('good/JJ'), Word('place/NN')]
Extracted Nouns  : {'place'}
--------------------------------------------------------------------------------
Subject          :  some good place
[VP]             :  to have
[VP] [WORDS]     :  [Word('to/TO'), Word('have/VB')]
Object [WORDS]   :  [Word('lunch/NN')]
Extracted Nouns  : {'lunch'}
--------------------------------------------------------------------------------


In [151]:
s = parsetree(doc6, relations=True, lemmata=True)
for sent in s:
    for chunk in sent.chunks:
        print('TYPE     : ', chunk.type)
        print('PNP      : ', chunk.pnp)
        print("STRING   : ", chunk.string)
        print("WORDS    : ", chunk.words)       
        print('-'*80)

TYPE     :  VP
PNP      :  None
STRING   :  Can
WORDS    :  [Word('Can/MD')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  you
WORDS    :  [Word('you/PRP')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  please recommend
WORDS    :  [Word('please/VB'), Word('recommend/VB')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  some good place
WORDS    :  [Word('some/DT'), Word('good/JJ'), Word('place/NN')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  to have
WORDS    :  [Word('to/TO'), Word('have/VB')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  lunch
WORDS    :  [Word('lunch/NN')]
----------------------

In [153]:
s = parsetree(doc5, relations=True, lemmata=True)
for sent in s:
    for chunk in sent.chunks:
        print('TYPE     : ', chunk.type)
        print('PNP      : ', chunk.pnp)
        print("STRING   : ", chunk.string)
        print("WORDS    : ", chunk.words)       
        print('-'*80)

TYPE     :  NP
PNP      :  None
STRING   :  I
WORDS    :  [Word('I/PRP')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  want to purchase
WORDS    :  [Word('want/VBP'), Word('to/TO'), Word('purchase/VB')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  a new phone
WORDS    :  [Word('a/DT'), Word('new/JJ'), Word('phone/NN')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  should i go
WORDS    :  [Word('should/MD'), Word('i/VB'), Word('go/VB')]
--------------------------------------------------------------------------------
TYPE     :  PP
PNP      :  for the iphone x
STRING   :  for
WORDS    :  [Word('for/IN')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  for the iphone x
STRING   :  the iphone x
W

In [165]:
def extract_psd(doc):
    s = parsetree(doc, relations=True, lemmata=True)
    for sent in s:
        for chunk in sent.chunks:
            print('TYPE     : ', chunk.type)
            print('PNP      : ', chunk.pnp)
            print("STRING   : ", chunk.string)
            print("WORDS    : ", chunk.words)       
            print('-'*80)
        print(sent.chunks)
        print('-'*80)

In [166]:
extract_psd(doc6)

TYPE     :  VP
PNP      :  None
STRING   :  Can
WORDS    :  [Word('Can/MD')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  you
WORDS    :  [Word('you/PRP')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  please recommend
WORDS    :  [Word('please/VB'), Word('recommend/VB')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  some good place
WORDS    :  [Word('some/DT'), Word('good/JJ'), Word('place/NN')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  to have
WORDS    :  [Word('to/TO'), Word('have/VB')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  lunch
WORDS    :  [Word('lunch/NN')]
----------------------

In [157]:
extract_psd(doc5)

TYPE     :  NP
PNP      :  None
STRING   :  I
WORDS    :  [Word('I/PRP')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  want to purchase
WORDS    :  [Word('want/VBP'), Word('to/TO'), Word('purchase/VB')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  a new phone
WORDS    :  [Word('a/DT'), Word('new/JJ'), Word('phone/NN')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  should i go
WORDS    :  [Word('should/MD'), Word('i/VB'), Word('go/VB')]
--------------------------------------------------------------------------------
TYPE     :  PP
PNP      :  for the iphone x
STRING   :  for
WORDS    :  [Word('for/IN')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  for the iphone x
STRING   :  the iphone x
W

In [158]:
doc3 = data[2]['text']

In [159]:
doc4 = data[3]['text']

In [160]:
doc2 = data[1]['text']

In [161]:
doc1 = data[0]['text']

In [167]:
extract_psd(doc3)

TYPE     :  ADVP
PNP      :  None
STRING   :  recently
WORDS    :  [Word('recently/RB')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  I
WORDS    :  [Word('I/PRP')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  was wondering
WORDS    :  [Word('was/VBD'), Word('wondering/VBG')]
--------------------------------------------------------------------------------
TYPE     :  PP
PNP      :  if it
STRING   :  if
WORDS    :  [Word('if/IN')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  if it
STRING   :  it
WORDS    :  [Word('it/PRP')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  pays to buy
WORDS    :  [Word('pays/VBZ'), Word('to/TO'), Word('buy/VB')]
--------------------------------------------------

In [163]:
doc3

"Hi, recently I was wondering if it pays to buy gtx 1070 or wait for new this year's nvidia graphics cards"

In [168]:
extract_psd("I want to buy a phone")

TYPE     :  NP
PNP      :  None
STRING   :  I
WORDS    :  [Word('I/PRP')]
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  want to buy
WORDS    :  [Word('want/VBP'), Word('to/TO'), Word('buy/VB')]
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  a phone
WORDS    :  [Word('a/DT'), Word('phone/NN')]
--------------------------------------------------------------------------------
[Chunk('I/NP-SBJ-1'), Chunk('want to buy/VP-1'), Chunk('a phone/NP-OBJ-1')]
--------------------------------------------------------------------------------


In [183]:
s = parsetree(doc6, relations=True, lemmata=True)
for sent in s:
    for chunk in sent.chunks:
        if chunk.type == 'VP' and chunk.object is not None:
            print("TYPE     : ", chunk.type)            
            print("PNP      : ", chunk.pnp)
            print("STRING   : ", chunk.string)            
            print("OBJECT   : ", chunk.object)            
            print('-'*80)            
        if chunk.type == "PP" or chunk.type == "NP":
            print("TYPE     : ", chunk.type)            
            print("PNP      : ", chunk.pnp)
            print("STRING   : ", chunk.string)
            print("WORDS    : ", chunk.words)
            print("OBJECT   : ", chunk.object)
            print('-'*80)
    print(sent.chunks)

TYPE     :  VP
PNP      :  None
STRING   :  Can
OBJECT   :  you
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  you
WORDS    :  [Word('you/PRP')]
OBJECT   :  None
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  please recommend
OBJECT   :  some good place
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  some good place
WORDS    :  [Word('some/DT'), Word('good/JJ'), Word('place/NN')]
OBJECT   :  None
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  to have
OBJECT   :  lunch
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  lunch
WORDS    :  [Word('lunch/NN')]
OBJECT   :  None
-------------------------------------

In [184]:
s = parsetree("What are some good places to have lunch in Bangalore?", relations=True, lemmata=True)
for sent in s:
    for chunk in sent.chunks:
        if chunk.type == 'VP' and chunk.object is not None:
            print("TYPE     : ", chunk.type)            
            print("PNP      : ", chunk.pnp)
            print("STRING   : ", chunk.string)            
            print("OBJECT   : ", chunk.object)            
            print('-'*80)            
        if chunk.type == "PP" or chunk.type == "NP":
            print("TYPE     : ", chunk.type)            
            print("PNP      : ", chunk.pnp)
            print("STRING   : ", chunk.string)
            print("WORDS    : ", chunk.words)
            print("OBJECT   : ", chunk.object)
            print('-'*80)
    print(sent.chunks)

TYPE     :  VP
PNP      :  None
STRING   :  are
OBJECT   :  some good places
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  some good places
WORDS    :  [Word('some/DT'), Word('good/JJ'), Word('places/NNS')]
OBJECT   :  None
--------------------------------------------------------------------------------
TYPE     :  VP
PNP      :  None
STRING   :  to have
OBJECT   :  lunch
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  None
STRING   :  lunch
WORDS    :  [Word('lunch/NN')]
OBJECT   :  None
--------------------------------------------------------------------------------
TYPE     :  PP
PNP      :  in Bangalore
STRING   :  in
WORDS    :  [Word('in/IN')]
OBJECT   :  None
--------------------------------------------------------------------------------
TYPE     :  NP
PNP      :  in Bangalore
STRING   :  Bangalore
WORDS    :  [Word('Bangalore/NNP')]
OBJECT 