In [1]:
# https://neo4j.com/docs/cypher-manual/current/introduction/
# show everything: MATCH (n) OPTIONAL MATCH (n)-[r]-() RETURN n, r;
# delete everything: MATCH (n) DETACH DELETE n

# https://workspace-preview.neo4j.io/workspace/query
import neo4j_helper
import numpy as np
import torch
from transformers import AutoTokenizer, AutoConfig,\
      T5ForConditionalGeneration, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
from scipy.special import softmax

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dao = neo4j_helper.Neo4jDAO(uri="neo4j+s://467d365d.databases.neo4j.io:7687", user="neo4j", pwd="Ssh4fzvQ2dzrSxY0Ru8Vl92SZAQlXuKoZpdmucF0sdM")

In [13]:
dao.query('CREATE (n:Person {name: \'Andy\', title: \'Developer\'})')
dao.query('CREATE (n:Person {name: \'Paul\', title: \'Developer\'})')

[]

In [None]:
dao.createEdge2(src_name='Paul', src_type='Person', trg_name='Andy', trg_type='Person', rel_type='friends', two_way=True)

In [None]:
temp = {
    'hi': 'hello',
    'imgood': 'are you?'
}

In [None]:
dao.createNode2('Computer', temp)

In [2]:
# download the models
cot_tokenizer = AutoTokenizer.from_pretrained("prakharz/DIAL-BART0")
cot_model = AutoModelForSeq2SeqLM.from_pretrained("prakharz/DIAL-BART0")
cot_model.load_state_dict(torch.load('../../model/topic_er.pt'))

sent_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
sent_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
config = AutoConfig.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")

inst_tokenizer = AutoTokenizer.from_pretrained("prakharz/DIAL-BART0")
inst_model = AutoModelForSeq2SeqLM.from_pretrained("prakharz/DIAL-BART0")

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
def extract_topic_sentiment(text_in):
    instruct_input = "Instruction:What is the topic of conversation?\n\nInput:[CONTEXT]{}[ENDOFDIALOGUE][QUESTION]The topic of conversation is".format(text_in)
    tokens_input = inst_tokenizer(instruct_input, max_length=250, padding='max_length', truncation=True, return_tensors='pt')
    input_out = inst_model.generate(**tokens_input)
    topic = inst_tokenizer.decode(input_out[0], skip_special_tokens=True)

    tokens_input = sent_tokenizer(text_in, max_length=250, padding='max_length', truncation=True, return_tensors='pt')
    input_out = sent_model(**tokens_input)

    scores = softmax(input_out[0][0].detach().numpy())
    #print(scores)

    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    for i in range(scores.shape[0]):
        l = config.id2label[ranking[i]]
        s = scores[ranking[i]]
        print(f"{i+1}) {l} {np.round(float(s), 4)}")

    return topic, config.id2label[ranking[0]]

# chain of topics
def generate_cot(text_in):
    tok_text = cot_tokenizer(text_in, return_tensors='pt')
    gen_text = cot_model.generate(**tok_text)
    dec_text = cot_tokenizer.decode(gen_text[0], skip_special_tokens=True)
    return dec_text

In [16]:
in_str = "I had fun watching the hokie football game."

topic, sent = extract_topic_sentiment(in_str)
dec_out = generate_cot(topic)

dec_out = dec_out.replace(' ', '')
dec_out = dec_out.replace('-', '')
dec_out = dec_out.replace(',', ',')
dec_out = dec_out.replace('[', '')
dec_out = dec_out.replace(']', '')
topic = topic.replace(' ', '')
print("{} -> {}".format((dec_out.split(',')), topic))
print(sent)

cot = dec_out.split(',')
cot.append(topic)

print(cot)



1) positive 0.9812
2) neutral 0.0167
3) negative 0.0021
['Sports', 'Football', 'NCAAAllTimeLeadingScorers'] -> Hokiefootball
positive
['Sports', 'Football', 'NCAAAllTimeLeadingScorers', 'Hokiefootball']


In [11]:
topic_tokenizer = AutoTokenizer.from_pretrained('prakharz/DIAL-BART0')
topic_detector = AutoModelForSeq2SeqLM.from_pretrained('TrevorAshby/topic-detector')

user_response = 'I am really happy that I don\'t have to do any homework this weekend.'

topic_in_str = "Instruction: Extract the topic of the last conversation turn, and determine whether the human is interested in it.\n Input: [CONTEXT] " + 'Human: ' + user_response + " [ENDOFDIALOGUE] [QUESTION] Given this conversation provided, the topic and intent is"
user_input_ids = topic_tokenizer(topic_in_str, max_length=250, padding='max_length', return_tensors='pt').input_ids
topic_pref_example = topic_detector.generate(user_input_ids, max_new_tokens=128)
topic_pref = topic_tokenizer.decode(topic_pref_example[0], skip_special_tokens=True)
print(topic_pref)

{"high-level": {"topic": "weekend", "if_interest": "yes"}}


In [17]:
# if positive, mark all as positive
if sent == 'positive':
    for top in cot:
        dao.createNode2(top, {'name':top, 'preference':sent})
# if negative/neutral, mark all as unkown except current topic node, mark as neg/neu
else:
    for idx, top in enumerate(cot):
        if idx != len(cot)-1:
            dao.createNode2(top, {'name':top, 'preference':'unknown'})
        else:
            dao.createNode2(top, {'name':top, 'preference':sent})

name
preference
name
preference
name
preference
name
preference


In [18]:
# create all edges
for top in cot:
    for top2 in cot:
        print(top, top2)
        if top != top2:
            print('here')
            dao.createEdge2(src_name=top, src_type=top, trg_name=top2,\
                 trg_type=top2, rel_type='related', two_way=False)

Sports Sports
Sports Football
here
Sports NCAAAllTimeLeadingScorers
here
Sports Hokiefootball
here
Football Sports
here
Football Football
Football NCAAAllTimeLeadingScorers
here
Football Hokiefootball
here
NCAAAllTimeLeadingScorers Sports
here
NCAAAllTimeLeadingScorers Football
here
NCAAAllTimeLeadingScorers NCAAAllTimeLeadingScorers
NCAAAllTimeLeadingScorers Hokiefootball
here
Hokiefootball Sports
here
Hokiefootball Football
here
Hokiefootball NCAAAllTimeLeadingScorers
here
Hokiefootball Hokiefootball
