In [None]:
'''This script finds discussion subjects in a review and the descriptors 
used by the reviewers to express their views on the subject'''

In [None]:
import pandas as pd
from itertools import chain
from collections import Counter

import spacy
import en_core_web_sm

# load the nlp model for POS and dependency tagging
nlp = spacy.load("en_core_web_sm")

In [None]:
def get_descriptors(text):
    '''get adjectives for a subject/noun'''

    def is_adjective(token):
        '''return whether or not a token is an adjective.'''
        return token.dep_ == 'amod' or token.pos_ == 'ADJ'

    def get_children(token):
        '''get a list reprsenting all adjectives that are either first 
        or second order children of a given token.'''
        first_ch = [child for child in token.children
                    if child.pos_ not in ['AUX', 'VERB']]
        second_ch = [list(ch.children) for ch in first_ch]
        second_ch = list(chain.from_iterable(second_ch))  # convert to 1D list
        return first_ch + second_ch

    subjects_descriptors = {}
    for token in nlp(text):
        # adjectives for subjects
        if token.dep_ == 'nsubj' and token.pos_ != 'PRON':
            descriptors = []
            # descriptive adjectives
            adjectives = [child for child in get_children(token)
                          if is_adjective(child)]
            descriptors.extend(adjectives)
            # predicate adjectives (using a linking verb)
            if token.head.pos_ in ['AUX', 'VERB']:
                descriptors.extend([child for child in get_children(token.head)
                                    if is_adjective(child)])
            descriptors = list(set(descriptors))
            subjects_descriptors[token] = descriptors
        # adjectives for non-subject nouns
        elif token.pos_ in ['NOUN', 'PROPN']:
            subjects_descriptors[token] = [child for child in get_children(token)
                                           if is_adjective(child)]
        else:
            continue
    return subjects_descriptors