In [1]:
import spacy
import pytextrank
import pandas as pd

from math import sqrt

nlp = spacy.load('en_core_web_md') 

from spacytextblob.spacytextblob import SpacyTextBlob 
spacy_text_blob = SpacyTextBlob() 
nlp.add_pipe(spacy_text_blob) 

tr = pytextrank.TextRank()
nlp.add_pipe(tr.PipelineComponent, name="textrank", last=True)

# from sense2vec import Sense2VecComponent
# s2v = Sense2VecComponent(nlp.vocab).from_disk("/home/maze/Hacking/_datasets/s2v_old")
# nlp.add_pipe(s2v)

df = pd.read_csv('emoji-faces.csv')
# emoji_list = list(df["Char"])

In [None]:
from pathlib import Path
from datetime import date, datetime

In [2]:
def make_spacy_doc(text):
    doc = nlp(text)
    return(doc)

In [3]:
# For generating emoji through a simple sentiment-analysis grid
def sympathize(doc): 
    neutrality = 1 - doc._.sentiment.subjectivity
    return(doc._.sentiment.polarity, neutrality)

def dist(coord, loc):
    a = abs(coord[0]-loc[0])
    b = abs(coord[1]-loc[1])
    c = sqrt(a**2 + b**2)
    return c

def emote(doc, n=5):
    coord = sympathize(doc)
    #     print(coord)
    distances = [(i, dist(coord, (row["Sentiment score"], row["Neut"]))) for i, row in df.iterrows()]
    nearest = sorted(distances, key=lambda tup: tup[1])
    emojis = pd.DataFrame([(df["Char"][i], distance) for i, distance in nearest])
    return("".join([emoji for emoji in emojis[:n][0]]))

In [None]:
# # Get emoji using sense2vec (huge download though)
# def sim(char, doc):
#     char_doc = make_spacy_doc(char)
#     phrase_list = doc.s2v_phrases
#     return(doc[0:-1]._.s2v_similarity(char_doc))

# def emote(doc, n=5):
#     distances = [(char, sim(char, doc)) for char in emoji_list]
#     nearest = sorted(distances)
#     return(nearest[:n])

In [164]:
def translate(tok, from_word="simple", to_word="uncomplicated", n=10):
    tok_doc = make_spacy_doc(tok)
    if tok_doc.vector is None:
        return("something I don't know about")
    vec = tok_doc.vector - make_spacy_doc(from_word).vector #+ make_spacy_doc(to_word).vector
#     vec = tok_doc.vector
    vec_ids = nlp.vocab.vectors.most_similar(vec.reshape(1,vec.shape[0]), n=n)
    new_toks = [nlp.vocab.strings[vec] for i, vec in enumerate(vec_ids[0][0])]
    return(new_toks)

In [189]:

def crappy_sort(doc, memory, n=5):
    if not doc._.phrases:
        return "..."
    thoughts = []
    for p in doc._.phrases: 
        if p.text not in str(memory).lower():
            for ent in doc.ents:
                if ent.text.lower() in p.text:
                    if ent.label_ == "PERSON" or ent.label_ == "NORP" or ent.label_ == "ORG":
                        thoughts.append(f"Who is {ent.text}?")
                    elif ent.label_ == "GPE" or ent.label_ == "FAC" or ent.label_ == "LOC": 
                        thoughts.append(f"Where is {ent.text}?")
                    elif ent.label_ == "DATE" or ent.label_ == "TIME": 
                        thoughts.append(f"When is {ent.text}?")
                    elif ent.label_ == "PERCENT" or ent.label_ == "MONEY"or ent.label_ == "QUANTITY": 
                        thoughts.append(f"How much is {ent.text}?")                
                    else:
                        thoughts.append(f"What is {ent.text}? {ent.label_.capitalize()}?")
            new_toks = translate(p.text)
            if new_toks != []:
                thoughts.append(f"{p.text}...")
                for tok in new_toks:
                    if tok.lower not in str(thoughts).lower():
                        thoughts.append(f"{tok}?...")
    return(" ".join(thoughts[:n]))

In [None]:
def respond(stimulus, memory):
    doc = make_spacy_doc(stimulus)
    emoji = emote(doc, 1)
    query = crappy_sort(doc, memory, 3)
    return(f"\n----  {emoji} {query}\n\n")

In [None]:
def record(memory, outdir='output'):
    now = str(date.today()) + ".md"
    outpath = Path(outdir)/now
    with open(outpath, 'a') as f:
        print(f'## {datetime.now()}', file=f)
        print(' '.join(memory), file=f)

In [190]:
print("----  ðŸ˜ƒ What's happening?")
stimulus = "Hi"
memory = []
while stimulus != "":
    stimulus = input()
    response = respond(stimulus, memory)
    memory.append(f'{stimulus}{response}')
    print(response)
record(memory)

----  ðŸ˜ƒ What's happening?


  def crappy_sort(doc, memory, n=5):     if not doc._.phrases:         return "..."     thoughts = []     for p in doc._.phrases:          if p.text not in str(memory).lower():             for ent in doc.ents:                 if ent.text.lower() in p.text:                     if ent.label_ == "PERSON" or ent.label_ == "NORP" or ent.label_ == "ORG":                         thoughts.append(f"Who is {ent.text}?")                     elif ent.label_ == "GPE" or ent.label_ == "FAC" or ent.label_ == "LOC":                          thoughts.append(f"Where is {ent.text}?")                     elif ent.label_ == "DATE" or ent.label_ == "TIME":                          thoughts.append(f"When is {ent.text}?")                     elif ent.label_ == "PERCENT" or ent.label_ == "MONEY"or ent.label_ == "QUANTITY":                          thoughts.append(f"How much is {ent.text}?")                                     else:                         thoughts.append(f"What is {ent.text}? {ent.label_.capit

TypeError: 'in <string>' requires string as left operand, not builtin_function_or_method

In [191]:
doc

Trump campaign communications director Tim Murtaugh declined to comment on Giuliani's condition or whether Ellis has tested positive or plans to quarantine. He referred CNN to tweets by Trump and Ellis.

In [200]:
thoughts = []
for p in doc._.phrases: 
    if p.text not in str(memory).lower():
        for ent in doc.ents:
            if ent.text.lower() in p.text:
                if ent.label_ == "PERSON" or ent.label_ == "NORP" or ent.label_ == "ORG":
                    thoughts.append(f"Who is {ent.text}?")
                elif ent.label_ == "GPE" or ent.label_ == "FAC" or ent.label_ == "LOC": 
                    thoughts.append(f"Where is {ent.text}?")
                elif ent.label_ == "DATE" or ent.label_ == "TIME": 
                    thoughts.append(f"When is {ent.text}?")
                elif ent.label_ == "PERCENT" or ent.label_ == "MONEY"or ent.label_ == "QUANTITY": 
                    thoughts.append(f"How much is {ent.text}?")                
                else:
                    thoughts.append(f"What is {ent.text}? {ent.label_.capitalize()}?")
        new_toks = translate(p.text)
        if new_toks != []:
            thoughts.append(f"{p.text}...")
            for tok in new_toks:
                if tok.lower() not in str(memory).lower() and tok.lower() not in str(thoughts).lower():
                    thoughts.append(f"{tok}?...")


In [201]:
tok.lower()

'she'