## Inferencing from Llama-2 (7b)

In [None]:
#!pip install -q transformers langchain accelerate

In [None]:
#==========#
# Set seed #
#==========#
import torch
import random
import numpy as np
from transformers import set_seed

seed = 42

set_seed(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

## Using Huggingface and langchain for inference

In [None]:
huggingface_auth_key = open("./auth_keys/huggingface_auth_key.txt", "r").read()

In [None]:
import textwrap
import transformers
from termcolor import colored
from transformers import (AutoTokenizer, AutoModelForCausalLM)
from langchain.llms import HuggingFacePipeline
from langchain.memory import ConversationBufferMemory
from langchain import PromptTemplate, LLMChain

class LLaMA2ChatAgent:
    def __init__(
        self,
        tokenizer: AutoTokenizer,
        model: AutoModelForCausalLM,
        chat_history: ConversationBufferMemory = ConversationBufferMemory(memory_key="chat_history"),
        new_system_prompt: str = None,
        max_tokens: int = 128,
        temperature: float = 0.7,
    ) -> None:
        self.tokenizer = tokenizer
        self.model = model
        self.max_tokens = max_tokens
        self.temperature = temperature
        self.llm = HuggingFacePipeline(
            pipeline = transformers.pipeline(
                task="text-generation",
                model=self.model, tokenizer=self.tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                eos_token_id=self.tokenizer.eos_token_id,
                max_new_tokens=self.max_tokens,
            ),
            model_kwargs = {'temperature': self.temperature}
        )
        self.chat_history = chat_history
        self.B_INST, self.E_INST = "[INST]", "[/INST]"
        self.B_SYS, self.E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
        self.new_system_prompt = new_system_prompt
        self.DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""

    def reset(self) -> None:
        self.chat_history.clear()

    def get_prompt(self, instruction: str, new_system_prompt: str):
        SYSTEM_PROMPT = self.B_SYS + new_system_prompt + self.E_SYS
        prompt_template =  self.B_INST + SYSTEM_PROMPT + instruction + self.E_INST
        return prompt_template

    def parse_text(text):
        wrapped_text = textwrap.fill(text, width=100)
        print( colored(wrapped_text, "green") +'\n\n')
        # return assistant_text

    def chat(self, message: str) -> str:
        template = self.get_prompt(
            instruction="Chat History:\n\n{chat_history} \n\nUser: {user_input}",
            new_system_prompt=self.DEFAULT_SYSTEM_PROMPT if self.new_system_prompt is None else self.new_system_prompt
        )
        prompt = PromptTemplate(input_variables=["chat_history", "user_input"], template=template)
        llm_chain = LLMChain(
            llm=self.llm,
            prompt=prompt,
            verbose=True,
            memory=self.chat_history,
        )
        return llm_chain.predict(user_input=message)

In [None]:
llama2_ChatAgent = LLaMA2ChatAgent(
    tokenizer = AutoTokenizer.from_pretrained(
        "meta-llama/Llama-2-7b-chat-hf", use_auth_token=huggingface_auth_key),
    model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-2-7b-chat-hf", device_map='auto', torch_dtype=torch.float16, use_auth_token=huggingface_auth_key),
    new_system_prompt = """Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.""",
)

## OpenAI models

In [None]:
import os
openai_api_key = open("./auth_keys/openai_key.txt", "r").read()
os.environ["OPENAI_API_KEY"] = openai_api_key