# Chatbot

In [None]:
%pip install -q accelerate
%pip install -q bitsandbytes
%pip install -q gradio
%pip install -q huggingface_hub
%pip install -q jinja2
%pip install -q langchain
%pip install -q python-dotenv
%pip install -q scipy
%pip install -q torch
%pip install -q transformers

In [None]:
# Provide Hugging Face access token
import dotenv
import os

dotenv.load_dotenv('/.env')
HF_ACCESS_TOKEN = os.getenv('HF_ACCESS_TOKEN')

## Model

In [None]:
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

In [None]:
model_id = 'meta-llama/Llama-2-7b-chat-hf'

In [None]:
# Configure for 4-bit quantization (optimizes model deployment)
bnb_config = BitsAndBytesConfig(
    bnb_4bit_compute_dtype = 'float16',
    bnb_4bit_quant_type='nf4',
    load_in_4bit=True,
)

# Load model configuration
model_config = AutoConfig.from_pretrained(
    model_id,
    use_auth_token=HF_ACCESS_TOKEN
)

In [None]:
# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    config=model_config,
    device_map='auto',
    quantization_config=bnb_config,
    use_auth_token=HF_ACCESS_TOKEN
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=HF_ACCESS_TOKEN
)

# Set model into evaluation mode (optimizes inference)
model.eval()

In [None]:
# Set up the text-generation pipeline
pipe = pipeline(
    model=model,
    task='text-generation',
    tokenizer=tokenizer
)

## App

In [None]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
from langchain.prompts.prompt import PromptTemplate

# Template using jinja2 syntax
template = """
<s>[INST] <<SYS>>
The following is a friendly conversation between a human and an AI. 
The AI is talkative and provides lots of specific details from its context. 
If the AI does not know the answer to a question, it truthfully says it does not know.
Please be concise.
<</SYS>>

Current conversation:
{{ history }}

{% if history %}
    <s>[INST] Human: {{ input }} [/INST] AI: </s>
{% else %}
    Human: {{ input }} [/INST] AI: </s>
{% endif %} 
"""

prompt = PromptTemplate(
    input_variables=["history", "input"],
    template=template,
    template_format="jinja2"
)

In [None]:
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

# Initialize the conversation chain
conversation = ConversationChain(
    llm=llm,
    memory=ConversationBufferMemory(),
    prompt=prompt,
    verbose=False
)

In [None]:
# Start the conversation
def predict(message: str, history: str):
    response = conversation.predict(input=message)

    return response

In [None]:
import gradio as gr

# Set up the user interface
interface = gr.ChatInterface(
    clear_btn=None,
    fn=predict,
    retry_btn=None,
    undo_btn=None,
)

# Launch the user interface
interface.launch(
    height=600,
    inline=True,
    share=True,
    width=800
)