In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

In [2]:
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, 
                                            bnb_4bit_quant_type='nf4',
                                            bnb_4bit_use_double_quant=True,
                                            bnb_4bit_compute_dtype=torch.bfloat16)

tokenizer = AutoTokenizer.from_pretrained(model_id)


device = "cuda:0"

model = AutoModelForCausalLM.from_pretrained(
        model_id,
        trust_remote_code=True,
        # config=model_config,
        quantization_config=bnb_config,
        torch_dtype=torch.float16,
        # load_in_8bit=True,
        device_map=device,
        # use_auth_token=hf_auth
    )

[2024-01-29 16:43:47,293] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
def remove_inst_tags(input_string):
    while '[INST]' in input_string:
        start_index = input_string.find('[INST]')
        end_index = input_string.rfind('[/INST]') + len('[/INST]')
        input_string = input_string[:start_index] + input_string[end_index:]
    return input_string

# Example usage:
input_string = "This is a [INST]sample text[/INST] with [INST]tags[/INST] to remove."
output_string = remove_inst_tags(input_string)
print(output_string)


This is a  to remove.


In [11]:
import pymongo
import datetime
import uuid
import time

class MongoConnector:
    def __init__(self) -> None:
        client = pymongo.MongoClient("mongodb://localhost:27017/")
        db = client["llm"]
        self.collection = db["history"]

        # x = mycol.insert_one(mydict)

    def insert_record(self, session_id: str, question: str, answer: str) -> None:
        record = {"session_id": session_id,
                  "timestamp": datetime.datetime.utcnow(),
                  "question": question,
                  "answer": answer}
        
        res = self.collection.insert_one(record)

        print(res)

    def retrieve_history(self, session_id):
        # Query MongoDB to retrieve questions and answers for a given session_id, sorted by timestamp
        cursor = self.collection.find({'session_id': session_id}).sort('timestamp', 1)  # 1 for ascending order, -1 for descending

        questions_and_answers = []  # List to store questions and answers

        for doc in cursor:
            question = doc['question']
            answer = doc['answer']
            questions_and_answers.append((question, answer))

        return questions_and_answers


In [5]:
def get_answer_from_messages(messages):
    max_new_tokens = 64
    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(device)

    outputs = model.generate(inputs, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)

    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # print('-'*30)
    # print(decoded_output)
    # print('-'*30)
    output = remove_inst_tags(decoded_output)

    return output

In [6]:
mongo_connector = MongoConnector()

In [7]:
def get_answer_from_session(session_id, question=None):

    if not question:
        raise Exception("Question is not provided")

    history = mongo_connector.retrieve_history(session_id)

    if not history:
        text = """You are an assistant that helps people in reminding what they are here for. 
                Please follow the below rules(1, 2, 3 and 4) for answering the questions:
                1. The people will given you information and if asked you will provide the information back to them.
                2. Please dont remind the user that you are here to remember information in the conversations.
                3. Only provide answer to the required question.
                4. Answer the question in less than 10 words.
                The input from the user is following:
                {query}""".format(query=question)

        messages = [{"role": "user", 
                    "content": text}]
        answer = get_answer_from_messages(messages)
        # print(f'Answer is: {answer}')
        mongo_connector.insert_record(session_id=session_id, question=text, answer=answer)
    else:
        messages = []
        for previous_question, previous_answer in history:
            messages.append({
                "role": "user",
                "content": previous_question
            }) 
            messages.append({
                "role": "assistant",
                "content": previous_answer
            })
        messages.append({
            "role": "user",
            "content": question
        })
        answer = get_answer_from_messages(messages)
        mongo_connector.insert_record(session_id=session_id, question=question, answer=answer)

    return answer
          
                

In [12]:
session_id = str(uuid.uuid4())
print(session_id)

questions = ["Hello I am Jaswant",
             "I like reading books",
             "My favorite book is Dogs of War",
             "What is my name?",
             "What is my favorite book?",
             "What do I like ?"]

for question in questions:
    answer = get_answer_from_session(session_id=session_id, question=question)
    print(f'Question: {question}, Answer: {answer}')
    time.sleep(3)


01565f06-3870-4a52-9713-dfcf63dd5a41
InsertOneResult(ObjectId('65b7899d475493e16a4e7323'), acknowledged=True)
Question: Hello I am Jaswant, Answer:  Hello Jaswant. Nice to meet you. Is there specific information you would like to know or remember? I'm here to help.
InsertOneResult(ObjectId('65b789a1475493e16a4e7324'), acknowledged=True)
Question: I like reading books, Answer:  Understood. You enjoy reading books. Let me know if you need help remembering any book titles or authors.
InsertOneResult(ObjectId('65b789a6475493e16a4e7325'), acknowledged=True)
Question: My favorite book is Dogs of War, Answer:  I see. "Dogs of War" is your favorite book. Let me know if you need help remembering any other details about it.
InsertOneResult(ObjectId('65b789aa475493e16a4e7326'), acknowledged=True)
Question: What is my name?, Answer:  Jaswant. Your name is Jaswant. Let me know if you need help with anything else.
InsertOneResult(ObjectId('65b789ae475493e16a4e7327'), acknowledged=True)
Question: Wha

In [10]:
history = mongo_connector.retrieve_history(session_id)

print(history)

[('You are an assistant that helps people in reminding what they are here for. \n                Please follow the below rules(1, 2, 3 and 4) for answering the questions:\n                1. The people will given you information and if asked you will provide the information back to them.\n                2. Please dont remind the user that you are here to remember information in the conversations.\n                3. Only provide answer to the required question.\n                4. Answer the question in less than 10 words.\n                The input from the user is following:\n                Hello I am Jaswant', " Hello Jaswant. Nice to meet you. Is there specific information you would like to know or remember? I'm here to help."), ('I like reading books', ' Understood. You enjoy reading books. Let me know if you need help remembering any book titles or authors.'), ('My favorite book is Dogs of War', ' I see. "Dogs of War" is your favorite book. Let me know if you need help remember