In [None]:
import transformers
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, T5ForConditionalGeneration

from transformers import AutoModel, AutoConfig

from scipy.special import softmax
import numpy as np

# from peft import PeftModel


## User Input / Memory Extraction 1

Models

In [None]:
inst_tokenizer = AutoTokenizer.from_pretrained("prakharz/DIAL-BART0")
inst_model = AutoModelForSeq2SeqLM.from_pretrained("prakharz/DIAL-BART0")

# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

sent_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
sent_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
config = AutoConfig.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")

In [None]:
user_input = "I like country music."

In [None]:
instruct_input = "Instruction:What is the topic of conversation?\n\nInput:[CONTEXT]{}[ENDOFDIALOGUE][QUESTION]The topic of conversation is".format(user_input)
tokens_input = inst_tokenizer(instruct_input, max_length=250, padding='max_length', truncation=True, return_tensors='pt')
input_out = inst_model.generate(**tokens_input)
topic = inst_tokenizer.decode(input_out[0], skip_special_tokens=True)
print(topic)

In [None]:
tokens_input = sent_tokenizer(user_input, max_length=250, padding='max_length', truncation=True, return_tensors='pt')
input_out = sent_model(**tokens_input)

scores = softmax(input_out[0][0].detach().numpy())
print(scores)

ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = config.id2label[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

In [None]:
def extract_topic_sentiment(text_in):
    instruct_input = "Instruction:What is the topic of conversation?\n\nInput:[CONTEXT]{}[ENDOFDIALOGUE][QUESTION]The topic of conversation is".format(text_in)
    tokens_input = inst_tokenizer(instruct_input, max_length=250, padding='max_length', truncation=True, return_tensors='pt')
    input_out = inst_model.generate(**tokens_input)
    topic = inst_tokenizer.decode(input_out[0], skip_special_tokens=True)

    tokens_input = sent_tokenizer(text_in, max_length=250, padding='max_length', truncation=True, return_tensors='pt')
    input_out = sent_model(**tokens_input)

    scores = softmax(input_out[0][0].detach().numpy())
    #print(scores)

    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    for i in range(scores.shape[0]):
        l = config.id2label[ranking[i]]
        s = scores[ranking[i]]
        print(f"{i+1}) {l} {np.round(float(s), 4)}")

    return topic, config.id2label[ranking[0]]

In [None]:
# THIS CELL IS JUST USED FOR DATASET MANIPULATION
# file = open('./CoT/generated_ds.csv', 'r')
# f2 = open('./CoT/ds.txt', 'w')
# lines = file.readlines()

# for line in lines:
#     idx = line.find(':')
#     l2 = '[' + line[:idx]  + ']|' + line[idx+1:]
#     f2.write(l2)

AI TOPIC GENERATOR

In [None]:
inst_tokenizer2 = AutoTokenizer.from_pretrained("prakharz/DIAL-BART0")
inst_model2 = AutoModelForSeq2SeqLM.from_pretrained("prakharz/DIAL-BART0")

inst_model2.load_state_dict(torch.load('./model/topic_er.pt'))

In [None]:
# chain of topics
def generate_cot(text_in):
    tok_text = inst_tokenizer2(text_in, return_tensors='pt')
    gen_text = inst_model2.generate(**tok_text)
    dec_text = inst_tokenizer2.decode(gen_text[0], skip_special_tokens=True)
    return dec_text

In [None]:
print(extract_topic_sentiment("I like Abraham Lincoln"))
print(generate_cot("Abraham Lincoln"))

## Memory Update

In [None]:
# META-TOPIC : {Music, Sports, Games}
# SUB-TOPIC : {Rock n Roll, Country, etc.}
# MICRO-TOPIC : {Journey, Boston, ACDC, Aerosmith}
memory = {}

def add_to_memory(mem, memory, idx, key=None, value=None):
    # print(mem)
    if len(mem) == 0:
        #print(memory.keys())
        if key != None:
            if key in memory.keys():
                memory[key] = value
            else:
                memory[key] = value

        return memory
    
    if mem[0] not in memory.keys():
        memory[mem[0]] = {}
        
        if key != None:
            #print(mem[0])
            #print(memory[mem[0]].keys())
            if key not in memory[mem[0]].keys():
                memory[mem[0]] = {key:value}

    add_to_memory(mem[1:], memory[mem[0]], idx+1, key, value)
    return memory

print(add_to_memory(['Animals', 'Marine Life', 'Sharks', 'Beach'], memory, 0))
print(add_to_memory(['Animals', 'Marine Life', 'Sharks', 'Sand'], memory, 0, 'relationship', 'dislike'))
print()
print(add_to_memory(['Animals', 'Marine Life', 'Sharks', 'Sand'], memory, 0, 'relationship', 'like'))
print(add_to_memory(['Animals', 'Marine Life', 'Sharks', 'Sand'], memory, 0, 'strength', 5))

# print(memory['Animals']['Marine Life']['Sharks']['Sand'].keys())

## Memory Retreive

In [None]:
def read_memory(mem, memory, idx):
    if len(mem) == 0:
        return memory
    return read_memory(mem[1:], memory[mem[0]], idx+1)

print(read_memory(['Animals', 'Marine Life', 'Sharks'], memory, 0))

## Node2Profile

In [None]:
print(str(read_memory(['Animals', 'Marine Life', 'Sharks'], memory, 0))+" <- IS A STRING")

## Guideline Generate

In [None]:
# THE SAME

## Response Generate

In [None]:
# THE SAME

## Memory Extraction 2

In [None]:
# USE METHOD FROM MEMORY EXTRACTION 1
ext = extract_topic_sentiment("I do not like reading non-fiction books.")
# WILL NEED TO RUN THIS SENTENCE BY SENTENCE?
print(ext)

cot = generate_cot(ext[0])
print(cot)

## Self-checking