## Inferencing from LLMs

In [11]:
#!pip install -q transformers accelerate
#!pip install langchain==0.0.293

In [12]:
#==========#
# Set seed #
#==========#
import torch
import random
import numpy as np
from transformers import set_seed

seed = 42

set_seed(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

## Using Huggingface and langchain for inferencing Mistral

In [13]:
huggingface_auth_key = open("./auth_keys/huggingface_auth_key.txt", "r").read()

In [14]:
import transformers
from transformers import (AutoTokenizer, AutoModelForCausalLM)
from langchain.llms import HuggingFacePipeline
from langchain.memory import ConversationBufferMemory
from langchain import PromptTemplate, LLMChain
import textwrap
from termcolor import colored

class MistralChatAgent:
    def __init__(
        self,
        tokenizer: AutoTokenizer,
        model: AutoModelForCausalLM,
        chat_history: ConversationBufferMemory = ConversationBufferMemory(memory_key="chat_history"),
        new_system_prompt: str = None,
        max_tokens: int = 128,
        temperature: float = 0.7,
    ) -> None:
        self.tokenizer = tokenizer
        self.model = model
        self.llm = HuggingFacePipeline(
            pipeline = transformers.pipeline(
                task="text-generation",
                model=self.model, tokenizer=self.tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.eos_token_id,
                max_new_tokens=max_tokens,
            ),
            model_kwargs = {'temperature': temperature}
        )
        self.chat_history = chat_history
        self.B_INST, self.E_INST = "[INST]", "[/INST]"
        self.new_system_prompt = new_system_prompt
        self.DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""

    def reset(self) -> None:
        self.chat_history.clear()

    def get_prompt(self, instruction: str, new_system_prompt: str):
        SYSTEM_PROMPT = new_system_prompt + "\n\n"
        prompt_template = self.B_INST + SYSTEM_PROMPT + instruction + self.E_INST
        return prompt_template

    def chat(self, message: str) -> str:
        template = self.get_prompt(
            instruction="Chat History:\n\n{chat_history} \n\nUser: {user_input}",
            new_system_prompt=self.DEFAULT_SYSTEM_PROMPT if self.new_system_prompt is None else self.new_system_prompt
        )
        prompt = PromptTemplate(input_variables=["chat_history", "user_input"], template=template)
        llm_chain = LLMChain(
            llm=self.llm,
            prompt=prompt,
            verbose=True,
            memory=self.chat_history,
        )
        print (colored(textwrap.fill(llm_chain.predict(user_input=message).strip(), width=100), "red"))

In [15]:
mistral_ChatAgent = MistralChatAgent(
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2"),
    model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map='auto', torch_dtype=torch.float16),
    new_system_prompt = """Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.""",
)

Downloading shards: 100%|██████████| 3/3 [00:00<00:00, 6108.21it/s]


Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.43s/it]


In [16]:
mistral_ChatAgent.chat("My name is Hongli, who are you?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m[INST]Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.

Chat History:

 

User: My name is Hongli, who are you?[/INST][0m

[1m> Finished chain.[0m
[31mI am a helpful AI designed to assist with information and answer questions to the best of my
ability. How can I help you today, Hongli?[0m


In [17]:
mistral_ChatAgent.chat("What's my name?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m[INST]Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.

Chat History:

Human: My name is Hongli, who are you?
AI:  I am a helpful AI designed to assist with information and answer questions to the best of my ability. How can I help you today, Hongli? 

User: What's my name?[/INST][0m



[1m> Finished chain.[0m
[31mHongli, I was just introduced to you as your name in our previous interaction. If you have a
different name or question, please let me know.[0m


## Llama-2 (7B)

In [18]:
class LLaMA2ChatAgent:
    def __init__(
        self,
        tokenizer: AutoTokenizer,
        model: AutoModelForCausalLM,
        chat_history: ConversationBufferMemory = ConversationBufferMemory(memory_key="chat_history"),
        new_system_prompt: str = None,
        max_tokens: int = 128,
        temperature: float = 0.7,
    ) -> None:
        self.tokenizer = tokenizer
        self.model = model
        self.llm = HuggingFacePipeline(
            pipeline = transformers.pipeline(
                task="text-generation",
                model=self.model, tokenizer=self.tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                eos_token_id=self.tokenizer.eos_token_id,
                max_new_tokens=max_tokens,
            ),
            model_kwargs = {'temperature': temperature}
        )
        self.chat_history = chat_history
        self.B_INST, self.E_INST = "[INST]", "[/INST]"
        self.B_SYS, self.E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
        self.new_system_prompt = new_system_prompt
        self.DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""

    def reset(self) -> None:
        self.chat_history.clear()

    def get_prompt(self, instruction: str, new_system_prompt: str):
        SYSTEM_PROMPT = self.B_SYS + new_system_prompt + self.E_SYS
        prompt_template =  self.B_INST + SYSTEM_PROMPT + instruction + self.E_INST
        return prompt_template

    def chat(self, message: str) -> str:
        template = self.get_prompt(
            instruction="Chat History:\n\n{chat_history} \n\nUser: {user_input}",
            new_system_prompt=self.DEFAULT_SYSTEM_PROMPT if self.new_system_prompt is None else self.new_system_prompt
        )
        prompt = PromptTemplate(input_variables=["chat_history", "user_input"], template=template)
        llm_chain = LLMChain(
            llm=self.llm,
            prompt=prompt,
            verbose=True,
            memory=self.chat_history,
        )
        print (colored(textwrap.fill(llm_chain.predict(user_input=message).strip(), width=100), "red"))

In [19]:
llama_ChatAgent = LLaMA2ChatAgent(
    tokenizer = AutoTokenizer.from_pretrained(
        "meta-llama/Llama-2-7b-chat-hf", token=huggingface_auth_key),
    model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-2-7b-chat-hf", device_map='auto', torch_dtype=torch.float16, token=huggingface_auth_key),
    new_system_prompt = """Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.""",
)

Downloading shards: 100%|██████████| 2/2 [00:00<00:00, 5603.61it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.39s/it]


In [20]:
llama_ChatAgent.chat("My name is Hongli, who are you?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m[INST]<<SYS>>
Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.
<</SYS>>

Chat History:

 

User: My name is Hongli, who are you?[/INST][0m

[1m> Finished chain.[0m
[31mHello Hongli! I am LLaMA, an AI assistant developed by Meta AI that can understand and respond to
human input in a conversational manner. How can I assist you today?[0m


In [21]:
llama_ChatAgent.chat("What's my name?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m[INST]<<SYS>>
Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.
<</SYS>>

Chat History:

Human: My name is Hongli, who are you?
AI:   Hello Hongli! I am LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner. How can I assist you today? 

User: What's my name?[/INST][0m

[1m> Finished chain.[0m
[31mYour name is Hongli.[0m


## Try to do some summarization task?

In [22]:
text = """LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don't plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don't think I'll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK box office chart. Details of how he'll mark his landmark birthday are under wraps. His agent and publicist had no comment on his plans. "I'll definitely have some sort of party," he said in an interview. "Hopefully none of you will be reading about it." Radcliffe's earnings from the first five Potter films have been held in a trust fund which he has not been able to touch. Despite his growing fame and riches, the actor says he is keeping his feet firmly on the ground. "People are always looking to say 'kid star goes off the rails,'" he told reporters last month. "But I try very hard not to go that way because it would be too easy for them." His latest outing as the boy wizard in "Harry Potter and the Order of the Phoenix" is breaking records on both sides of the Atlantic and he will reprise the role in the last two films. Watch I-Reporter give her review of Potter's latest » . There is life beyond Potter, however. The Londoner has filmed a TV movie called "My Boy Jack," about author Rudyard Kipling and his son, due for release later this year. He will also appear in "December Boys," an Australian film about four boys who escape an orphanage. Earlier this year, he made his stage debut playing a tortured teenager in Peter Shaffer's "Equus." Meanwhile, he is braced for even closer media scrutiny now that he's legally an adult: "I just think I'm going to be more sort of fair game," he told Reuters. E-mail to a friend . Copyright 2007 Reuters. All rights reserved.This material may not be published, broadcast, rewritten, or redistributed."""

In [23]:
mistral_ChatAgent.reset()
mistral_ChatAgent.chat(f"Please summarize the following article: \n{text}")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m[INST]Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.

Chat History:

 

User: Please summarize the following article: 
LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don't plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don't think I'll be particula

In [24]:
llama_ChatAgent.reset()
llama_ChatAgent.chat(f"Please summarize the following article: \n{text}")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m[INST]<<SYS>>
Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.
<</SYS>>

Chat History:

 

User: Please summarize the following article: 
LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don't plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don't think 

## OpenAI models

In [25]:
import os
openai_api_key = open("./auth_keys/openai_key.txt", "r").read()
os.environ["OPENAI_API_KEY"] = openai_api_key

In [26]:
from openai import OpenAI

class OpenAIAgent:
    def __init__(
        self,
        model_name: str = "gpt-3.5-turbo",
        new_system_prompt: str = None,
        max_tokens: int = 128,
        temperature: float = 0.7,
        seed: int = 42,
    ) -> None:
        self.client = OpenAI()
        self.model_name = model_name
        self.max_tokens = max_tokens
        self.temperature = temperature
        self.seed = seed
        self.DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
        self.system_prompt = new_system_prompt if new_system_prompt is not None else self.DEFAULT_SYSTEM_PROMPT
        self.chat_history = [{"role": "system", "content": self.system_prompt}]

    def reset(self) -> None:
        self.chat_history = [{"role": "system", "content": self.system_prompt}]

    def chat(self, message: str) -> str:
        self.chat_history.append({"role": "user", "content": message})
        for role in self.chat_history:
            for key, value in role.items():
                print (colored(f'{key}:', "yellow"), colored(f'{value}', "green"))
        ai_message = self.client.chat.completions.create(
            model = self.model_name,
            messages = self.chat_history,
            temperature = self.temperature,
            max_tokens = self.max_tokens,
            seed = self.seed,
        ).choices[0].message.content

        self.chat_history.append({"role": "assistant", "content": ai_message})
        print (textwrap.fill(colored(ai_message, "red"), width=100))

In [27]:
gpt_ChatAgent = OpenAIAgent(
    model_name = "gpt-3.5-turbo",
    new_system_prompt = """Respond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.""",
)

In [28]:
gpt_ChatAgent.chat("My name is Hongli, who are you?")

[33mrole:[0m [32msystem[0m
[33mcontent:[0m [32mRespond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.[0m
[33mrole:[0m [32muser[0m
[33mcontent:[0m [32mMy name is Hongli, who are you?[0m
[31mYou can call me Assistant.[0m


In [29]:
gpt_ChatAgent.chat("Who am I?")

[33mrole:[0m [32msystem[0m
[33mcontent:[0m [32mRespond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.[0m
[33mrole:[0m [32muser[0m
[33mcontent:[0m [32mMy name is Hongli, who are you?[0m
[33mrole:[0m [32massistant[0m
[33mcontent:[0m [32mYou can call me Assistant.[0m
[33mrole:[0m [32muser[0m
[33mcontent:[0m [32mWho am I?[0m
[31mYou are Hongli.[0m


In [30]:
gpt_ChatAgent.reset()
gpt_ChatAgent.chat(f"Please summarize the following article: \n{text}")

[33mrole:[0m [32msystem[0m
[33mcontent:[0m [32mRespond with a response in the format requested by the user. Do not acknowledge my request with "sure" or in any other way besides going straight to the answer.[0m
[33mrole:[0m [32muser[0m
[33mcontent:[0m [32mPlease summarize the following article: 
LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don't plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don't think I'll be particularl