In [1]:
import numpy as np
import torch
import json
from transformers import AutoTokenizer, AutoConfig,\
      T5ForConditionalGeneration, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, AutoModelForCausalLM
from scipy.special import softmax

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# download the models
cot_tokenizer = AutoTokenizer.from_pretrained("prakharz/DIAL-BART0")
cot_model = AutoModelForSeq2SeqLM.from_pretrained("prakharz/DIAL-BART0")
cot_model.load_state_dict(torch.load('./CoT/topic_extraction/model/topic_er2.pt'))

recommender_tokenizer = AutoTokenizer.from_pretrained("t5-large")
recommender_model = AutoModelForSeq2SeqLM.from_pretrained("t5-large")
recommender_model.load_state_dict(torch.load('./CoT/recommender/model/rec_er.pt'))

guideliner_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
guideliner = T5ForConditionalGeneration.from_pretrained("TrevorAshby/guideliner")

blen_tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
blen_model = AutoModelForSeq2SeqLM.from_pretrained("TrevorAshby/blenderbot-400M-distill")

FileNotFoundError: [Errno 2] No such file or directory: './topic_extraction/model/topic_er2.pt'

In [None]:
def generate_cot(text_in):
    tok_text = cot_tokenizer(text_in, return_tensors='pt')
    gen_text = cot_model.generate(**tok_text)
    dec_text = cot_tokenizer.decode(gen_text[0], skip_special_tokens=True)
    return dec_text

def generate_recommendation(text_in):
    tok_text = recommender_tokenizer(text_in, return_tensors='pt')
    gen_text = recommender_model.generate(**tok_text, max_new_tokens=32)
    dec_text = recommender_tokenizer.decode(gen_text[0], skip_special_tokens=True)
    return dec_text

def generate_guideline(text_in, suggested_topics):
    guide_in_str =f'A: {text_in}| {suggested_topics}'
    in_ids = guideliner_tokenizer(guide_in_str, max_length=512, padding='max_length', return_tensors='pt').input_ids
    guideline_example = guideliner.generate(in_ids, max_new_tokens=50)
    guideline = guideliner_tokenizer.decode(guideline_example[0], skip_special_tokens=True)
    return guideline

def generate_response(text_in, guideline):
    blend_in_str = text_in + ' [GUIDELINE] ' + guideline
    blend_in_ids = blen_tokenizer([blend_in_str], max_length=512, return_tensors='pt', truncation=True)
    blend_example = blen_model.generate(**blend_in_ids, max_length=60)
    response = blen_tokenizer.batch_decode(blend_example, skip_special_tokens=True)[0]
    return response

def CoT_to_Preference(cot):
    # (sports,yes)|(football team,yes)
    # "{\"sports\":\"positive\", \"football\":\"positive\"}"
    topics = cot.split('|')
    top_dict = {}
    for top in topics:
        top = top.replace('(', '')
        top = top.replace(')', '')
        the_top, pref = top.split(',')
        #print(pref)
        if pref == 'yes':
            pref = 'positive'
        elif pref == 'no':
            pref = 'negative'
        else:
            pref = 'unknown'
        top_dict[the_top] = pref
    return top_dict

In [None]:
# human response
user_in = "I watched a really good movie today."

# topic extraction
topic_xtract = generate_cot(user_in)

# memory update
# memory retrieve
topic_pref_profile = CoT_to_Preference(topic_xtract)

# topic recommender
num_sugg = 3
prompt = f"Instruction: Generate only {num_sugg} similar topics that could be suggested for new conversation that takes influence from but are not present in the following user profile: {topic_pref_profile} In the generated answer, generate each of the suggested topics separated by a comma like so: TOPIC1,TOPIC2,TOPIC3,TOPIC4,etc.\nSuggested Topics:"
topic_recs = generate_recommendation(prompt).split(',')

# guideline generation

# response generate
