In [1]:
import time
import re
import json
import nltk
import torch
import numpy
import pickle
import codecs
import evaluate
import argparse
import networkx as nx
from tqdm import tqdm
from openai import OpenAI
from datetime import datetime
from UniEval.UniEval.utils import convert_to_json
from UniEval.UniEval.metric.evaluator import get_evaluator
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
def generate_cot(text_in, tok_in, mod_in):
    instruction = "Instruction: Generate a list of topics increasing in specificity to define the subject of conversation.\n"
    instruction += f"Input:{text_in}"
    formatted_prompt = (f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\nThe topics defining the input are:")
    tok_text = tok_in(formatted_prompt, return_tensors='pt').to('cuda:0')
    gen_text = mod_in.generate(**tok_text, max_new_tokens=60)
    dec_text = tok_in.decode(gen_text[0], skip_special_tokens=True)
    #print(dec_text)
    dec_text = re.search('```.*\n```', dec_text).group()[3:-4]

    return dec_text

def CoT_to_Preference(cot):
    # (sports,yes)|(football team,yes)
    # "{\"sports\":\"positive\", \"football\":\"positive\"}"
    topics = cot.split('|')
    top_dict = {}
    for top in topics:
        top = top.replace('(', '')
        top = top.replace(')', '')
        the_top, pref = top.split(',')
        # print(pref)
        if pref == 'yes':
            pref = 'positive'
        elif pref == 'no':
            pref = 'negative'
        else:
            pref = 'unknown'
        top_dict[the_top] = pref
    return top_dict

def update_graph(top_pref_prof, g):
    prev_tpxt = []
    for tpxt in top_pref_prof:
        # add node if not in graph, else update it
        if tpxt not in g.nodes:
            g.add_node(tpxt, pref=top_pref_prof[tpxt])
        else:
            #! FIXME UPDATE THIS TO THE NEW METHOD: Pos->Neut->Neg
            if g.nodes[tpxt]['pref'] == 'positive':
                if top_pref_prof[tpxt] == 'negative':
                    g.nodes[tpxt]['pref'] = 'unknown'
            if g.nodes[tpxt]['pref'] == 'unknown':
                if top_pref_prof[tpxt] == 'positive':
                    g.nodes[tpxt]['pref'] = 'positive'
                if top_pref_prof[tpxt] == 'negative':
                    g.nodes[tpxt]['pref'] = 'negative'
            if g.nodes[tpxt]['pref'] == 'negative':
                if top_pref_prof[tpxt] == 'positive':
                    g.nodes[tpxt]['pref'] = 'positive'
            # g.nodes[tpxt]['pref'] = top_pref_prof[tpxt]
            
        # add all links between nodes in chain if not already existing only if more than 1 node
        if len(top_pref_prof) > 1 and len(prev_tpxt) >= 1:
            for pt in prev_tpxt:
                if (pt.split(',')[0], tpxt.split(',')[0]) not in g.edges:
                    g.add_edge(pt.split(',')[0], tpxt.split(',')[0])
        # prev_tpxt = tpxt
        prev_tpxt.append(tpxt)  

def generate_recommendation(text_in, tok_in, mod_in):
    tok_text = tok_in(text_in, return_tensors='pt').to('cuda:0')
    gen_text = mod_in.generate(**tok_text, max_new_tokens=1024)
    dec_text = tok_in.decode(gen_text[0], skip_special_tokens=True)
    return dec_text



In [3]:
# topic extraction
cot_tokenizer = AutoTokenizer.from_pretrained("../CoT/topic_extraction/hf_model/")
cot_model = AutoModelForCausalLM.from_pretrained("../CoT/topic_extraction/hf_model/")
cot_model.to('cuda:0')

# memory module
graph = nx.Graph()

# topic recommendation model
recc_tokenizer = AutoTokenizer.from_pretrained("../CoT/recommender/hf_model/")
recc_model = AutoModelForSeq2SeqLM.from_pretrained("../CoT/recommender/hf_model/", torch_dtype=torch.float32)
recc_model.to('cuda:0')
# response generation model
resp_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
resp_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token='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',torch_dtype=torch.float32)
resp_model.to('cuda:0')

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.71s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0): LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
      (1): LlamaDecoderLayer(
        (self_attn): LlamaAtten

In [349]:
conversation = []

In [412]:
print(conversation)

['person 2:I love playing volleyball!', "person 1:  Great! Person 1: Oh, really? I've always been impressed by the skill and athleticism of volleyball players. They have to be incredibly agile and coordinated to make those quick moves and spikes. Do you have a favorite player or team?", 'person 2:I do not know very much about professional volleyball. I know about professional basketball though!', 'person 1:  Understood! Here is the next conversation turn for person 1:\n\n"Ah, that\'s cool! I\'ve always been fascinated by the techniques used in basketball. Have you ever tried playing basketball? It\'s a great workout and can be really challenging to master the different shots and moves."', 'person 2:Yeah, I think playing basketball is fun!', 'person 1:  "Ah, that\'s great! I\'ve always been fascinated by the techniques used in basketball. Have you ever tried playing volleyball? It\'s a great workout and can be really challenging to master the different spikes and moves."', 'person 2:Yea

In [407]:
t = time.time()
utterance = "I have seen almost all of the marvel movies. I did not know that there is a new Avengers movie coming out. What is it about?"
topic_xtract = generate_cot(utterance, cot_tokenizer, cot_model)
topic_pref_profile = CoT_to_Preference(topic_xtract.strip())
print(topic_pref_profile)
a = time.time() - t
print(a)

{'movie': 'positive', 'Avengers': 'positive'}
0.9848790168762207


In [352]:
graph = nx.Graph()

In [408]:
t = time.time()
update_graph(topic_pref_profile, graph)
b = time.time() - t
print(b)

0.00023293495178222656


In [384]:
print(graph)
print(graph.nodes)
print(graph.edges)

Graph with 7 nodes and 6 edges
['sports', 'volleyball', 'basketball', 'other sports', 'movie', 'favorite movie', 'Dune 2.0']
[('sports', 'volleyball'), ('sports', 'basketball'), ('sports', 'other sports'), ('movie', 'favorite movie'), ('movie', 'Dune 2.0'), ('favorite movie', 'Dune 2.0')]


In [409]:
t = time.time()
focus_topic = list(topic_pref_profile.keys())[0]
xtract_prof = {}
xtract_prof[focus_topic] = graph.nodes[focus_topic]['pref']
for x_nodes in graph.edges([focus_topic]):
    xn = x_nodes[1]
    xtract_prof[xn] = graph.nodes[xn]['pref']
print(xtract_prof)
c = time.time() - t
print(c)

{'movie': 'positive', 'favorite movie': 'positive', 'Dune 2.0': 'positive', 'movie series': 'positive', 'Star Wars': 'positive', 'Avengers': 'positive'}
0.0005507469177246094


In [410]:
t = time.time()
num_sugg = 3
prompt = f"Instruction: Generate only {num_sugg} similar topics that could be suggested for new conversation that takes influence from but are not present in the following user profile: {xtract_prof} In the generated answer, generate each of the suggested topics separated by a comma like so: TOPIC1,TOPIC2,TOPIC3,TOPIC4,etc.\nSuggested Topics:"
topic_recs = generate_recommendation(prompt, recc_tokenizer, recc_model).split(',')
print(topic_recs)
d = time.time() - t
print(d)

['movie quotes', 'movie reviews', 'film adaptations']
0.28684425354003906


In [411]:
t = time.time()
conversation.append(f'person 2:{utterance}')
user_in = ''.join(conversation)
p_in = 'person 1'
not_p_in = 'person 2'
if xtract_prof[focus_topic] == 'positive':
    tpref = 'person 2 likes'
elif xtract_prof[focus_topic] == 'negative':
    tpref = 'person 2 dislikes'
else:
    tpref = 'It is unclear if the person 2 likes or dislikes'

guideline = f'{tpref} {focus_topic}. {p_in}\'s response should fall into one of the following 3 topics: {topic_recs}.'

# generate a response from our pipeline
#llama_in = f'<s>[INST] <<SYS>>\nYou are a person participating in a conversation. You are specifically {p_in}. <</SYS>>\nGenerate the next conversation turn for {p_in} responding to {not_p_in} in this conversation: {user_in} Limit the generated response to 1-2 sentences and compliant with this guideline: {guideline} [/INST] {p_in}:'


messages =  [{
                "role":"system",
                "content":f"""You are a person participating in a conversation. You are specifically {p_in}.
Limit the generated response to 1-2 sentences and compliant with this guideline: {guideline}"""
            },
            {
                "role":"user",
                "content": f"Generate only the next conversation turn for {p_in} responding to {not_p_in} in this conversation: {user_in}."
            }
        ]

llama_in = resp_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
blend_in_ids = resp_tokenizer(llama_in, max_length=1024, return_tensors='pt', truncation=True).to('cuda:0')
print(len(blend_in_ids['input_ids'][0]))
blend_example = resp_model.generate(**blend_in_ids, temperature=0.8, top_k=50, top_p=0.85)
our_response = resp_tokenizer.batch_decode(blend_example, skip_special_tokens=True)[0].split('[/INST]')[-1]
conversation.append(f'person 1:{our_response}')
print(our_response)
e = time.time() - t
print(e)

1024


KeyboardInterrupt: 

In [406]:
print(a + b + c + d + e)

7.273718357086182


In [102]:
llama_in = f'<s>[INST] <<SYS>>\nYou are a person participating in a conversation. You are specifically {p_in}. <</SYS>>\nGenerate the next conversation turn for {p_in} responding to {not_p_in} in this conversation: {user_in} Limit the generated response to 1-2 sentences and compliant with this guideline: {guideline} [/INST] {p_in}:'
blend_in_ids = resp_tokenizer(llama_in, return_tensors='pt').to('cuda:0')
#print(blend_in_ids['input_ids'][0])
print(len(blend_in_ids['input_ids'][0]))

558


In [None]:
t = time.time()
print(time.time() - t)