## Sentiment Analyzer Testing

In [15]:
import spacy
from spacy import displacy
from typing import List, Dict, Union
from attr import dataclass
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [16]:
sentiment_analyzer = SentimentIntensityAnalyzer()
nlp = spacy.load('en')

class Politician:

    def __init__(self, num, name):
        self.num = num
        self.name = name
        
@dataclass
class SentenceSubjectResult:
    sentiment: float
    subject: Politician
    pos: str
        
@dataclass
class AnalysisResult:
    politician: int
    sentiment: float

def get_entity_sentiments(statement: str, subjects: List[Politician] = None) -> Dict[int, float]:
    subject_results = {}

    sentences_subject_results: Dict[int, Dict[Politician, SentenceSubjectResult]] = {}
    doc = nlp(statement)
    for token in doc:
        politician = _lookup_subject(subjects, token.text)
        if politician is None:
            continue
        score = sentiment_analyzer.polarity_scores(token.sent.text)['compound']

        subject_result = SentenceSubjectResult(sentiment=score, subject=politician, pos=token.dep_)
        if token.sent.start not in sentences_subject_results:
            sentences_subject_results[token.sent.start] = {}
        sentences_subject_results[token.sent.start][politician] = subject_result

    for sentence_subject_results in sentences_subject_results.values():
        if len(sentence_subject_results.keys()) == 0:
            continue
        elif len(sentence_subject_results.keys()) == 1:
            key = list(sentence_subject_results.keys())[0]
            subject_results[sentence_subject_results[key].subject.num] = \
                sentence_subject_results[key].sentiment
            continue
        for politician in sentence_subject_results.keys():
            subject_result = sentence_subject_results[politician]
            if subject_result.pos == 'nsubj' or subject_result.pos == 'compound':
                continue
            subject_results[subject_result.subject.num] = subject_result.sentiment

    return subject_results

def _lookup_subject(subjects: List[Politician], sentence_subject: str) -> Union[Politician, None]:
    if subjects is None:
        return None

    for subject in subjects:
        subject_words = subject.name.split()
        for subject_word in subject_words:
            if sentence_subject.lower() == subject_word.lower():
                return subject
    return None


In [422]:
politicians = [
    Politician(1, 'Donald Trump'),
    Politician(2, 'Bernie Sanders')
]

In [410]:
# tweet = 'When Trump accuses Bernie Sanders of Murder Trump is actually admitting he’s a murderer'
tweet = 'Donald Trump is shit compared to Bernie Sanders'
result = get_entity_sentiments(tweet, politicians)

In [411]:
result

{2: -0.5574}

In [412]:
doc = nlp(tweet)
displacy.render(doc, style='dep')

In [413]:
def get_pos_subjects(doc, pos_list, politicians):
    verbs = { politician: [] for politician in politicians }
    for possible_verb in doc:
        if possible_verb.pos_ in pos_list:
            found_child = False
            children = possible_verb.children
            for child in children:
                match = match_politician(child.text, politicians)
                if match is not None and child.dep_ == 'nsubj':
                    verbs[match].append(possible_verb)
                    traverse_subject_conjs(child, possible_verb, verbs, politicians)
                    found_child = True
            if not found_child:
                traverse_up(possible_verb, possible_verb, verbs, politicians)
    print(verbs)
    
def match_politician(text, politicians):
    for politician in politicians:
        split_name = politician.split()
        if text in split_name:
            return politician
    return None

def traverse_up(possible_verb, current, verbs, politicians):
    head = current.head
    if current == head:
        return
    children = head.children
    for child in children:
        match = match_politician(child.text, politicians)
        if match is not None and child.dep_ == 'nsubj':
            verbs[match].append(possible_verb)
            traverse_subject_conjs(child, possible_verb, verbs, politicians)
    traverse_up(possible_verb, head, verbs, politicians)
    
    
def traverse_subject_conjs(subj, possible_verb, verbs, politicians):
    children = subj.children
    for child in children:
        if child.dep_ == 'conj':
            match = match_politician(child.text, politicians)
            if match is not None:
                verbs[match].append(possible_verb)
                traverse_subject_conjs(child, possible_verb, verbs, politicians)

In [423]:
politician_names = list(map(lambda x: x.name, politicians))
get_pos_subjects(doc, ['VERB', 'ADJ', 'NOUN'], politician_names)

{'Donald Trump': [shit, compared], 'Bernie Sanders': []}
