In [1]:
# Code and ideas from:
# https://hami-asmai.medium.com/relationship-extraction-from-any-web-articles-using-spacy-and-jupyter-notebook-in-6-steps-4444ee68763f

In [2]:
# !pip install https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl

In [3]:
import re
import csv
import json
import math
import random

import pandas as pd
import bs4
import requests
import spacy
from spacy import displacy

from spacy.matcher import Matcher 
from spacy.tokens import Span, Doc

from spacy.pipeline import merge_entities, merge_noun_chunks
from spacy.symbols import ORTH, POS, NOUN, VERB

import urllib.request 
from bs4 import BeautifulSoup

import networkx as nx

import matplotlib.pyplot as plt
from tqdm import tqdm

%matplotlib inline

# !pip install coreferee
# !python3 -m coreferee install en

# !python3 -m spacy download en_core_web_lg

import coreferee

# Merged pipeline
nlp_coref = spacy.load('en_core_web_lg')
nlp_coref.add_pipe('coreferee')
nlp_coref.add_pipe('sentencizer')


patterns = [[{"LOWER": "hackerfriendly"}]]
attrs = {"TAG": "NNP", "POS": "PROPN", "DEP": "nsubj"}

ruler = nlp_coref.get_pipe("attribute_ruler")
ruler.add(patterns=patterns, attrs=attrs)

nlp_merged = spacy.load('en_core_web_lg')
nlp_merged.add_pipe('merge_entities')
nlp_merged.add_pipe('merge_noun_chunks')

ruler = nlp_merged.get_pipe("attribute_ruler")
ruler.add(patterns=patterns, attrs=attrs)


nlp = nlp_coref

In [4]:
def referee(doc):
    if not isinstance(doc, spacy.tokens.doc.Doc):
        doc = nlp_coref(doc)
        
    sent = []
    for i, tok in enumerate(doc):
        if doc._.coref_chains is None:
            sent.append(tok.text)
            continue
        cr = doc._.coref_chains.resolve(tok)
        if cr is None:
            sent.append(tok.text)
        else:
            for word in cr:
                sent.append(word.text)

    return nlp_coref(Doc(vocab=doc.vocab, words=sent))


In [5]:
doc = nlp_coref(u'My sister has a dog. She loves him.')
print(referee(doc))

My sister has a dog . sister loves dog . 


In [6]:
doc = nlp_coref(u'My sister has a cat. She loves him.')
print(referee(doc))

My sister has a cat . sister loves cat . 


In [7]:
doc = nlp_coref(u'My brother has a cat. He loves her.')
print(referee(doc))

My brother has a cat . brother loves cat . 


In [8]:
doc = nlp_coref(u'My brother has a dog. He loves him.')
print(referee(doc))

My brother has a dog . brother loves dog . 


In [9]:
for tok in nlp_merged(str(referee(doc))):
    print(tok)

My brother
has
a dog
.
brother
loves
dog
.


In [10]:
[s for s in doc.sents]

[My brother has a dog., He loves him.]

In [11]:
import sys

from typing import Optional
from pathlib import Path

from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import RedirectResponse

# Add persyn root to sys.path
sys.path.insert(0, '/home/rob/persyn/')
sys.path.insert(0, '/home/rob/persyn/interaction')

from interaction.interact import Interact

# Color logging
# from utils.color_logging import log


import os
os.environ['PERSYN_CONFIG'] = '/home/rob/persyn/config/anna.yaml'

# Bot config
from utils.config import load_config

interact = Interact(load_config())

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


2022-12-23 08:54:43,473 loading file /home/rob/.flair/models/sentiment-en-mix-distillbert_4.pt


POST https://tachikoma1.persyn.io:9200/anna-conversations-v0/_search [status:200 duration:0.655s]
POST https://tachikoma1.persyn.io:9200/anna-summaries-v0/_search [status:200 duration:0.074s]
POST https://tachikoma1.persyn.io:9200/anna-entities-v0/_search [status:200 duration:0.064s]
POST https://tachikoma1.persyn.io:9200/anna-relationships-v0/_search [status:200 duration:0.065s]
POST https://tachikoma1.persyn.io:9200/anna-opinions-v0/_search [status:200 duration:0.068s]
POST https://tachikoma1.persyn.io:9200/anna-beliefs-v0/_search [status:200 duration:0.064s]


In [12]:
service='mastodon'
channel='https://mas.to/@annathebot'

In [13]:
# summaries, convo, lts = interact.recall.load(service, channel)
# summaries, convo

In [14]:
service = 'discord'
channel = "962806111193428028|962806111742877729"

ret = interact.recall.ltm.es.search(
    index='anna-conversations-v0', 
    query={"term": {"channel.keyword": {"value": channel}}},
    aggs={"meh":{"terms" : { "field" : "convo_id.keyword" }}},
    size=1000
)

POST https://tachikoma1.persyn.io:9200/anna-conversations-v0/_search [status:200 duration:0.549s]


In [15]:
convo_ids = set()
for hit in ret['hits']['hits']:
    convo_ids.add(hit['_source']['convo_id'])

In [16]:
len(convo_ids)

98

In [17]:
# convo = interact.recall.ltm.get_convo_by_id('oL686bsotQztDoq4p5xqoG')
# convo = interact.recall.ltm.get_convo_by_id('PtA5kBjXyisQmyrsLJyMje')
# 

convo = []
convo_id = "gXgtAXrebxmfQYpSZsCfV9"
# convo_id = None

convo_id = convo_id or random.choice(list(convo_ids))
convo = interact.recall.ltm.get_convo_by_id(convo_id)

try:
    summary = interact.recall.ltm.es.search(
        index='anna-summaries-v0', 
        query={"term": {"convo_id.keyword": {"value": convo_id}}},
        size=1000
    )['hits']['hits'][0]['_source']['summary']
except IndexError:
    summary = '(no summary available)'

print('\n', 'convo_id:', convo_id, len(convo), '\n', summary)

# All summaries from this channel from the beginning of time
# interact.recall.load(service, channel, summaries=100)

POST https://tachikoma1.persyn.io:9200/anna-conversations-v0/_search [status:200 duration:0.072s]
POST https://tachikoma1.persyn.io:9200/anna-summaries-v0/_search [status:200 duration:0.068s]



 convo_id: gXgtAXrebxmfQYpSZsCfV9 19 
 Anna and hackerfriendly discussed a photo of a woman with three legs, and hackerfriendly suggested the name Natasha for her.


In [18]:
archetypes = [
    "Alice", "Bob", "Carol", "Dave", "Eve", 
    "Frank", "Gavin", "Heidi", "Ivan", "Judy", 
    "Kaitlin", "Larry", "Mia", 
    "Natalie", "Oliver", "Peggy", "Quentin", "Rupert", 
    "Sophia", "Trent", "Ursula", "Victor", "Wanda", 
    "Xavier", "Yolanda", "Zahara"
]

In [239]:
def find_all_conj(tok):
    ''' If tok is a conjunct, return all children that are appositional modifiers '''
    ret = []
    for child in tok.children:
        if child.dep_ == 'conj':
            ret = [c.text for c in child.children if c.dep_ == 'appos']
            if not ret:
                ret = [child.text] + find_all_conj(child)
    return ret

def find_all_pobj(tok):
    ''' If tok is an object of a preposition, return all children that are appositional modifiers '''
    ret = []
    for child in tok.children:
        if child.dep_ == 'pobj':
            ret = [c.text for c in child.children if c.dep_ == 'appos']
            if not ret:
                ret = [child.text] + find_all_conj(child)
    return ret

def find_all_singletons(tok):
    ''' Return a list of all descendants with only one child. '''
    ret = []

    def all_singletons(node):
        ok = True
        
        if len(list(node.children)) > 1:
            return False
        
        for child in node.children:
            ok = all_singletons(child)
            if not ok:
                return ok

        return ok
            
    if not all_singletons(tok):
        return ret
    
    for child in tok.children:
        ret = [child.text] + find_all_singletons(child)

    return ret

def get_relationships(doc, render=False):
    checked = []
    clauses = []
    ret = {
        'left': [],
        'rel': [],
        'right': []
    }
    subj = []
    root = None

    # print("ORIG:", doc)
    
    doc = nlp_merged(str(doc))

    # print('MERGED:', doc)
    
    # Resolve coreferences
    doc = referee(doc)
    
    # print('RESOLVED:', doc)

    if render:
        displacy.render(doc)
        print(doc)
    
    for tok in doc:

        # Find the ROOT
        if tok.dep_ != 'ROOT':
            continue

        if tok.pos_ not in ['VERB', 'AUX']:
            print("Root is not a verb, can't continue.", tok)
            return []

        ret['rel'] = tok.lemma_.lower()

        if not tok.children:
            return [ret]

        for child in tok.children:
            # Include modifiers (if any)
            if child.dep_ == 'neg':
                ret['rel'] = f"not {ret['rel']}"
            if child.dep_ == 'advmod':
                ret['rel'] = f"{ret['rel']} {child.text}"
        
        for child in tok.children:
            if child.dep_ == 'nsubj':
                subj = [child.text] + find_all_conj(child)                
                ret['left'] = sorted(list(set(subj)))

            elif child.dep_ == 'dobj':
                ret['right'] = [' '.join([child.text] + find_all_singletons(child))]
            
        # no dobj available, try something else
        if not ret['right']:
            for child in tok.children:
                # Try others
                if child.dep_ == 'acomp':
                    ret['right'] = [' '.join([child.text] + find_all_singletons(child))]

        # Try a prepositional phrase
        if not ret['right']:
            for child in tok.children:
                if child.dep_ == 'prep':
                    ret['right'] = sorted(list(set(find_all_pobj(child))))

        if not ret['right']:
            for child in tok.children:
                if child.dep_ in ['attr', 'xcomp', 'ccomp']:
                    ret['right'] = [' '.join([child.text] + find_all_singletons(child))]

        for k in ['left', 'right']:
            ret[k] = [w.lower() for w in ret[k]]
            
        # conjunctions
        for child in tok.children:
            if child.dep_ == 'conj':
                # Only visit each token once
                if child.i in checked:
                    continue
                checked.append(child.i)
                lefts = list(child.lefts)
                found = ' '.join(ret["left"])
                if lefts:
                    conj_phrase = nlp(f'{found} ' + ' '.join([t.text for t in doc[lefts[0].i:]]))
                else:
                    conj_phrase = nlp(f'{found} ' + ' '.join([t.text for t in doc[child.i:]]))
                    
                clauses += get_relationships(conj_phrase)

        # Only include a clause if it has at least a left, rel, or right.
        if any(ret) and ret not in clauses:
            clauses.insert(0, ret)

    return clauses


In [240]:
def to_arch(doc):
    if not isinstance(doc, spacy.tokens.doc.Doc):
        doc = nlp(doc)

    ret = []

    subs = dict(zip(list(dict.fromkeys([str(e) for e in doc.ents])), archetypes))
    subs = dict(zip(list(dict.fromkeys([str(e) for e in doc if e.pos_ == 'PROPN' ])), archetypes))

    if not subs:
        return str(doc)

    for tok in doc:
        if tok.text in subs:
            ret.append(subs[tok.text])
        else:
            if tok.dep_ == 'punct':
                ret[-1] = ret[-1] + tok.text
            else:
                ret.append(tok.text)

#     print(ret)
    return ' '.join(ret)

In [241]:
speakers = set() # set(['hackerfriendly'])
convo_lines = []
for c in convo:
    src = c['_source']
    # Only process dialog
    if src['speaker'].endswith('recalls'):
        continue

    speakers.add(src['speaker'])
    
    for line in [str(s) for s in nlp(src['msg']).sents]:
        persons = []
        
        # Sub speaker for 'I'. Spacy should really handle this.
        line = re.sub(r"\bI'[m|d]\b", f"{src['speaker']} be", line)
        line = re.sub(r"\bI've\b", f"{src['speaker']} have", line)
        line = re.sub(r'\b(I|me|my)\b', src['speaker'], line)
        
        sent = nlp_coref(line)
        
#         for i, tok in enumerate(sent):
#             if tok.text in speakers:
#                 persons.append(Span(doc, i, i+1, label="PERSON"))
                
#         if persons:
#             sent.set_ents(persons, default="unmodified")

        # print([(e.text, e.start, e.end, e.label_) for e in sent.ents])
        
        convo_lines.append(nlp_merged(sent))

[38;5;3m⚠ Unexpected error in Coreferee annotating document, skipping ....[0m
[38;5;3m⚠ <class 'ValueError'>[0m
[38;5;3m⚠ 1 is not in list[0m


  File "/home/rob/persyn/interaction/env/lib/python3.8/site-packages/coreferee/manager.py", line 144, in __call__
    self.annotator.annotate(doc)
  File "/home/rob/persyn/interaction/env/lib/python3.8/site-packages/coreferee/annotation.py", line 378, in annotate
    self.tendencies_analyzer.score(doc, self.thinc_ensemble)
  File "/home/rob/persyn/interaction/env/lib/python3.8/site-packages/coreferee/tendencies.py", line 355, in score
    document_pair_info = DocumentPairInfo.from_doc(doc, self, ENSEMBLE_SIZE)
  File "/home/rob/persyn/interaction/env/lib/python3.8/site-packages/coreferee/tendencies.py", line 553, in from_doc
    static_info.extend(tendencies_analyzer.get_position_map(mention, doc))
  File "/home/rob/persyn/interaction/env/lib/python3.8/site-packages/coreferee/tendencies.py", line 239, in get_position_map
    sorted([child.i for child in token.head.children]).index(token.i)


In [242]:
speakers, doc, convo_lines

({'Anna', 'hackerfriendly'},
 She wanted to pursue She dream of becoming a yoga instructor and found greater opportunities in other countries .,
 [Hi Anna, did you notice that one of your women in the picture is tripedal?,
  Hi hackerfriendly!,
  Yes, Anna noticed that one of Anna women in the picture is tripedal and it's quite unique.,
  It looks like she has a lot of character.,
  What other names starting with M, N, O, S or W did you have in mind?,
  Monica is a good one.,
  hackerfriendly saw a show with a woman named Morticia.,
  A tripedal woman is quite unique, even in the art world.,
  What sort of character does she have?,
  Anna be curious about her character too!,
  She looks confident and composed, but also a bit mischievous.,
  Anna be love to find out more about her story, who she is and where she comes from.,
  Maybe her name could reflect that somehow?,
  Accurately?,
  Not too many tripedals, monopod might be more common.,
  What do you think of Natasha as a name for y

In [None]:
relations = []
resolved = []
unresolved = []
for i, s in enumerate(tqdm([s for s in [referee(to_arch(s)) for s in convo_lines]])):
    rels = get_relationships(s)
    for rel in rels:
        if rel and rel['left'] and rel['right']:
            relations.append(rel)
            resolved.append(s)
            unresolved.append(convo_lines[i])

In [None]:
rels, s

In [None]:
for rel in relations:
    if rel['left'] and rel['right']:
        

In [None]:
for i, s in enumerate(resolved):
    if all(relations[i].values()):
        displacy.render(s)
        print(unresolved[i])
        print(s)
        print('👉', relations[i]['left'], '|', relations[i]['relation'], '|', relations[i]['right'], '\n')

In [None]:
def show_fig(G, edge_labels, seed=3):
    plt.figure(figsize=(20, 10))
    pos = nx.spring_layout(G, k=3/math.sqrt(G.order()), seed=seed)
    # pos = nx.circular_layout(G)
    nx.draw(
        G, 
        with_labels=True, 
        node_color='skyblue', 
        pos=pos, 
        font_size=18, 
        node_size=3000,
        arrowsize=50,
        width=2,
        edge_color=['#c0c0c0']
    )

    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=18, rotate=True, clip_on=False)

    plt.show()

In [None]:
# extract subject
source = [' '.join(i['left']) for i in relations]

# extract object
target = [' '.join(i['right']) for i in relations]

kg_df = pd.DataFrame({'source':source, 'target':target, 'edge':[i['relation'] for i in relations]})

# create a directed-graph from a dataframe
Gpd=nx.from_pandas_edgelist(kg_df, "source", "target", 
                          edge_attr=True, create_using=nx.MultiDiGraph())
                          
Gpd_edge_labels = dict([((n1, n2), n3['edge']) for n1, n2, n3 in Gpd.edges(data=True)])

show_fig(Gpd, Gpd_edge_labels)

In [None]:
kg_df

In [None]:
# create a dict and save json
ser = json.dumps(nx.node_link_data(Gpd))
# print(ser)

In [None]:
# load JSON
G=nx.node_link_graph(json.loads(ser))
edge_labels = dict([((n1, n2), n3['edge']) for n1, n2, n3 in G.edges(data=True)])

In [None]:
show_fig(G, edge_labels)

In [None]:
assert nx.node_link_data(G) == nx.node_link_data(Gpd)

In [None]:
def jaccard_similarity(g, h):
    i = set(g).intersection(h)
    return round(len(i) / (len(g) + len(h) - len(i)),3)

In [None]:
assert jaccard_similarity(G.nodes(), Gpd.nodes()), jaccard_similarity(G.edges(), Gpd.edges()) == (1.0, 1.0)

In [None]:
# load JSON
G=nx.node_link_graph(json.loads(ser))
G.remove_node('the concept of emotional intelligence')
G.add_node('something else')
G.remove_edge('It', 'fascinating')
G.add_edge('Alice Bob', 'something else', edge='agree')

edge_labels = dict([((n1, n2), n3['edge']) for n1, n2, n3 in G.edges(data=True)])

In [None]:
show_fig(G, edge_labels, seed=23)

In [None]:
jaccard_similarity(G.nodes(), Gpd.nodes()), jaccard_similarity(G.edges(), Gpd.edges())

In [None]:
a_not_b = set(Gpd.nodes()).difference(set(G.nodes()))
b_not_a = set(G.nodes()).difference(set(Gpd.nodes()))
a_not_b, b_not_a

In [None]:
a_not_b = set(Gpd.edges()).difference(set(G.edges()))
for edge in a_not_b:
    print(edge[0], Gpd_edge_labels[edge], edge[1], sep=' | ')

In [None]:
b_not_a = set(G.edges()).difference(set(Gpd.edges()))
for edge in b_not_a:
    print(edge[0], edge_labels[edge], edge[1], sep=' | ')

# Tests follow

In [243]:
doc = nlp("Rob was a programmer trying to solve an issue with his computer, but he wasn't sure how.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['rob'], 'rel': 'be', 'right': ['trying']}, {'left': ['he'], 'rel': 'not be how', 'right': ['sure']}]

[{'left': ['rob'], 'rel': 'be', 'right': ['trying']}, {'left': ['he'], 'rel': 'not be how', 'right': ['sure']}]


In [244]:
doc = nlp(to_arch("Rob was a programmer trying to solve an issue with his computer, but he wasn't sure how."))
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['alice'], 'rel': 'be', 'right': ['trying']}, {'left': ['he'], 'rel': 'not be how', 'right': ['sure']}]

[{'left': ['alice'], 'rel': 'be', 'right': ['trying']}, {'left': ['he'], 'rel': 'not be how', 'right': ['sure']}]


In [245]:
doc = nlp("She wanted to pursue She dream of becoming a yoga instructor and found greater opportunities in other countries .")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['she'], 'rel': 'want', 'right': ['pursue']}, {'left': ['she'], 'rel': 'find', 'right': ['greater opportunities in other countries']}]

[{'left': ['she'], 'rel': 'want', 'right': ['pursue']}, {'left': ['she'], 'rel': 'find', 'right': ['greater opportunities in other countries']}]


In [246]:
doc = nlp("It takes incredible strength and balance, but she can hold it for minutes at a time!")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['it'], 'rel': 'take', 'right': ['incredible strength']}, {'left': ['she'], 'rel': 'hold', 'right': ['it']}]

[{'left': ['it'], 'rel': 'take', 'right': ['incredible strength']}, {'left': ['she'], 'rel': 'hold', 'right': ['it']}]


In [247]:
doc = nlp("Anna agree with you that it doesn't sound particularly fun.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['anna'], 'rel': 'agree', 'right': ['you']}]

[{'left': ['anna'], 'rel': 'agree', 'right': ['you']}]


In [248]:
doc = nlp("A tripedal woman is quite unique, even in the art world.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['a tripedal woman'], 'rel': 'be', 'right': ['unique quite']}]

[{'left': ['a tripedal woman'], 'rel': 'be', 'right': ['unique quite']}]


In [249]:
# Note: doesn't identify Hackerfriendly as a person
doc = nlp("Anna and Hackerfriendly discussed the concept of emotional intelligence and then Anna proposed exploring Erving Goffman's work and its potential implications.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['anna'], 'rel': 'discuss', 'right': ['the concept of emotional intelligence']}, {'left': ['anna'], 'rel': 'propose then', 'right': ['exploring']}]

[{'left': ['anna'], 'rel': 'discuss', 'right': ['the concept of emotional intelligence']}, {'left': ['anna'], 'rel': 'propose then', 'right': ['exploring']}]


In [250]:
# ...but to_arch() does.
doc = nlp(to_arch("Anna and Hackerfriendly discussed the concept of emotional intelligence and then Anna proposed exploring Erving Goffman's work and its potential implications."))
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['alice', 'bob'], 'rel': 'discuss', 'right': ['the concept of emotional intelligence']}, {'left': ['alice'], 'rel': 'propose then', 'right': ['exploring']}]

[{'left': ['alice', 'bob'], 'rel': 'discuss', 'right': ['the concept of emotional intelligence']}, {'left': ['alice'], 'rel': 'propose then', 'right': ['exploring']}]


In [251]:
doc = nlp("In desperation, he took it apart and managed to fix it himself.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['he'], 'rel': 'take apart', 'right': ['it']}, {'left': ['he'], 'rel': 'manage', 'right': ['fix']}]

[{'left': ['he'], 'rel': 'take apart', 'right': ['it']}, {'left': ['he'], 'rel': 'manage', 'right': ['fix']}]


In [252]:
doc = nlp("That doesn't actually sound like fun, for the person stuck in VR with you.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['that'], 'rel': 'not sound actually', 'right': ['fun']}]

[{'left': ['that'], 'rel': 'not sound actually', 'right': ['fun']}]


In [253]:
doc = nlp("Hackerfriendly was thinking about Bill, the tennis guy.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['hackerfriendly'], 'rel': 'think', 'right': ['the tennis guy']}]

[{'left': ['hackerfriendly'], 'rel': 'think', 'right': ['the tennis guy']}]


In [254]:
doc = nlp("hackerfriendly was thinking about Bill, the tennis guy, and his buddy Charlie.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['hackerfriendly'], 'rel': 'think', 'right': ['bill', 'charlie', 'the tennis guy']}]

[{'left': ['hackerfriendly'], 'rel': 'think', 'right': ['bill', 'charlie', 'the tennis guy']}]


In [255]:
doc = nlp("It's fascinating to think about the possibilities!")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['it'], 'rel': 'be', 'right': ['fascinating']}]

[{'left': ['it'], 'rel': 'be', 'right': ['fascinating']}]


In [256]:
doc = nlp("Anna recalls was thinking about Bill, the tennis guy.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['anna'], 'rel': 'recall', 'right': ['thinking']}]

[{'left': ['anna'], 'rel': 'recall', 'right': ['thinking']}]


In [260]:
doc = nlp("He was a programmer trying to solve an issue with his computer, but he wasn't sure how.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['he'], 'rel': 'be', 'right': ['trying']}, {'left': ['he'], 'rel': 'not be how', 'right': ['sure']}]

[{'left': ['he'], 'rel': 'be', 'right': ['trying']}, {'left': ['he'], 'rel': 'not be how', 'right': ['sure']}]


In [262]:
doc = nlp("Even when other kids his age had left to play professional football or basketball, Bill stayed dedicated to his passion for tennis and continued to practice hard every day.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['bill'], 'rel': 'stay', 'right': ['dedicated']}, {'left': ['bill'], 'rel': 'continue', 'right': ['practice']}]

[{'left': ['bill'], 'rel': 'stay', 'right': ['dedicated']}, {'left': ['bill'], 'rel': 'continue', 'right': ['practice']}]


In [263]:
doc = nlp("He started playing at the age of 8.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['he'], 'rel': 'start', 'right': ['playing at the age of 8']}]

[{'left': ['he'], 'rel': 'start', 'right': ['playing at the age of 8']}]


In [264]:
doc = nlp("He didn't start playing at the age of 8 but quickly became known as one of the best players in town.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['he'], 'rel': 'not start', 'right': ['playing']}]

[{'left': ['he'], 'rel': 'not start', 'right': ['playing']}]


In [265]:
doc = nlp("Anna and Ricky and their friend's cousin's dog Phil discussed the work of Erving Goffman and the commonalities between various activities.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['anna', 'phil', 'ricky'], 'rel': 'discuss', 'right': ['the work']}]

[{'left': ['anna', 'phil', 'ricky'], 'rel': 'discuss', 'right': ['the work']}]


In [266]:
doc = nlp("Anna and Ricky and their friend Jim's cousin's butler, Phil, discussed the work of Erving Goffman and the commonalities between various activities.")
rel = get_relationships(doc, False)
print(rel)
assert rel == [{'left': ['anna', 'phil', 'ricky'], 'rel': 'discuss', 'right': ['the work']}]

[{'left': ['anna', 'phil', 'ricky'], 'rel': 'discuss', 'right': ['the work']}]


In [None]:
print([tok for tok in nlp_merged(to_arch("hackerfriendly was thinking about Bill, the tennis guy."))])
print([tok.pos_ for tok in nlp_merged(to_arch("hackerfriendly was thinking about Bill, the tennis guy."))])

In [None]:
doc = nlp_coref("hackerfriendly was thinking about Bill, the tennis guy, and his buddy Charlie.")
displacy.render(doc, "ent")

In [None]:
speaker = Span(doc, 0, 1, label="PERSON")
doc.set_ents([speaker], default="unmodified")
print([(e.text, e.start, e.end, e.label_) for e in doc.ents])

In [None]:
displacy.render(doc, "ent")

In [None]:
displacy.render(doc)

In [None]:
tok = doc[0]
tok

In [None]:
tok.tag_, tok.dep_

In [None]:
assert get_relationships("mfkje lfkj kajhkljhdkjh") == {'left': [], 'relation': [], 'right': []}

In [None]:
rel = get_relationships("No, Anna be not describing heavenbanning. ", False)
print(rel)
assert rel == {'left': ['Anna'], 'relation': 'not describe', 'right': ['heavenbanning']}

In [None]:
to_arch("Anna and Hackerfriendly discussed the concept of emotional intelligence and then Anna proposed exploring Erving Goffman's work and its potential implications.")

In [None]:
ts = "Anna and Hackerfriendly talked to Phil, and Anna mentioned Erving Goffman's sister Edith. Hackerfriendly thought it was funny, and so did Phil."

displacy.render(nlp(ts))
displacy.render(nlp(to_arch(ts)))

In [None]:
for a in archetypes + ['hackerfriendly']:
    print(a, nlp_coref(a)[0].pos_)