<a href="https://colab.research.google.com/github/ak2742/mlplay/blob/main/02)_mistral_7b_with_prompt_template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Install all required libraries

# Huggingface libraries to run LLM.
!pip install -q -U transformers
!pip install -q -U accelerate
!pip install -q -U bitsandbytes

#LangChain related libraries
!pip install -q -U langchain==0.1.20

In [None]:
#@title Check for GPU

import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print("Device:", device)
if device == 'cuda':
    print(torch.cuda.get_device_name(0))

In [None]:
#@title Create Model

import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, BitsAndBytesConfig
from langchain.llms import HuggingFacePipeline

model_path = "filipealmeida/Mistral-7B-Instruct-v0.1-sharded"
origin_model_path = "mistralai/Mistral-7B-Instruct-v0.1"
bnb_config = BitsAndBytesConfig \
              (
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.bfloat16,
              )
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True,
                                              quantization_config=bnb_config,
                                              device_map="auto")

In [None]:
tokenizer = AutoTokenizer.from_pretrained(origin_model_path)

In [None]:
#@title Creating pipelines to run LLM at Colab notebook

text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=100,
    temperature = 0.5,
    do_sample=True,
)
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
#@title Run LLM

text = "What is mitochondria?"
response = mistral_llm.invoke(text)
print(response)

In [None]:
#@title Add Prompt

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

#### Prompt Template
prompt_template = """<s>[INST]
You are a reliable and trustworthy assistant, providing helpful and
respectful responses that precisely address the context.
Answer the question below from context below :
{context}
{question}
[/INST]
</s>
"""

question = """What is the primary cause of global warming?"""
context = """Global warming refers to the long-term increase in Earth's average
surface temperature, primarily caused by human activities, such as the burning of fossil
fuels and deforestation. This phenomenon has far-reaching consequences for the planet and its
inhabitants. With the release of greenhouse gases into the atmosphere, including carbon dioxide and
methane, a thickening blanket is formed, trapping heat and leading to the enhanced greenhouse effect. The
consequences of global warming are multifaceted and include rising sea levels, more frequent and severe weather
events, shifts in ecosystems, and threats to human health and well-being. Urgent action is required to mitigate global
warming by reducing greenhouse gas emissions, transitioning to renewable energy sources, and adopting sustainable practices
to safeguard the planet for future generations."""

prompt = PromptTemplate(template=prompt_template, input_variables=["question","context"])

llm_chain = prompt | mistral_llm

response = llm_chain.invoke({"question":question,"context":context})

print(response)