# Explore syntax parsing for detection of opinion holders and targets

In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [2]:
import spacy
nlp = spacy.load('en_core_web_lg')

In [3]:
import json

In [4]:
review_data_file = '../lexicon/data/yelp_sample.json'
with open(review_data_file, 'r') as infile:
    R = json.load(infile)

In [5]:
def spacy_sentences(nlp, text):
    sentences = []
    for sent in nlp(text).sents:
        sentences.append(sent)
    return sentences

In [6]:
R[0]

{'content': "Red, white and bleu salad was super yum and a great addition to the menu! This location was clean with great service and food served at just the right temps! Kids pizza is always a hit too with lots of great side dish options for the kiddos! When I'm on this side of town, this will definitely be a spot I'll hit up again!",
 'date': '2014-02-17',
 'stars': 4,
 'useful': 1,
 'funny': 0,
 'cool': 0,
 'business': 'Ue6-WhXvI-_1xUIuapl0zQ',
 'id': '----X0BIDP9tA49U3RvdSQ',
 'categories': ['American (Traditional)', 'Burgers', 'Restaurants']}

In [7]:
sample = spacy_sentences(nlp, R[0]['content'])

In [8]:
sample

[Red, white and bleu salad was super yum and a great addition to the menu!,
 This location was clean with great service and food served at just the right temps!,
 Kids pizza is always a hit too with lots of great side dish options for the kiddos!,
 When I'm on this side of town, this will definitely be a spot I'll hit up again!]

In [9]:
type(sample[0])

spacy.tokens.span.Span

## Build a graph of the sentences

In [10]:
import networkx as nx
from spacy.displacy import render

In [11]:
test = sample[0]

In [12]:
render(test)

In [15]:
data = pd.DataFrame([{
    'token_id': i, 'token': x.idx, 'text': x.text, 'pos': x.pos_, 'lemma': x.lemma_,
    'dep': x.dep_, 'head': x.head.idx
} for i, x in enumerate(test)])

In [16]:
data

Unnamed: 0,token_id,token,text,pos,lemma,dep,head
0,0,0,Red,ADJ,red,amod,20
1,1,3,",",PUNCT,",",punct,0
2,2,5,white,ADJ,white,conj,0
3,3,11,and,CCONJ,and,cc,5
4,4,15,bleu,PROPN,bleu,conj,5
5,5,20,salad,NOUN,salad,nsubj,26
6,6,26,was,AUX,be,ROOT,26
7,7,30,super,ADJ,super,amod,36
8,8,36,yum,NOUN,yum,attr,26
9,9,40,and,CCONJ,and,cc,36


## Explore the graph for detecting the target of adjectives and verbs
- hints: create a statistics of the adjectives and verbs most common dependencies

In [17]:
token_map = lambda span: dict([(x.idx, x) for x in span])

In [22]:
def explore(token, children=None, level=0, order=None):
    if children is None:
        children = []
    if order is None:
        order = token.idx
    for child in token.children:
        children.append((child, level, child.idx < order))
        explore(child, children=children, level=level+1, order=order)
    return children

In [36]:
def search_adjectives(nlp_text):
    nouns_map = dict([(x, []) for x in nlp_text if x.pos_ in ['NOUN', 'PROPN']])
    for noun in nouns_map.keys():
        subtree = explore(noun)
        subnouns = [(x, l) for x, l, _ in subtree if x.pos_ in ['NOUN', 'PROPN']]
        for token, level, left in subtree:
            if token.pos_ == 'ADJ' and len([(n, l) for n, l in subnouns if l < level]) == 0:
                nouns_map[noun].append(token)
    return nouns_map

In [37]:
m = token_map(test)

In [38]:
subtokens = explore(m[20])

In [39]:
for stok, level, left in subtokens:
    print(stok.text, stok.pos_, level, left)

Red ADJ 0 True
, PUNCT 1 True
white ADJ 1 True
and CCONJ 2 True
bleu PROPN 2 True


In [40]:
test

Red, white and bleu salad was super yum and a great addition to the menu!

In [41]:
nm = search_adjectives(test)

In [42]:
for k, v in nm.items():
    print(k, v)

bleu []
salad [Red, white]
yum [super]
addition [great]
menu []


## Explore the graph for detecting the subjects of verbs

## Create communities of nodes

## Find meaningful pairs ADJ-NOUN per business category