# Mistral Quantization and Finetuning

In [27]:
!pip install -q accelerate
!pip install -qi https://pypi.org/simple/ bitsandbytes

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [28]:
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM

In [29]:
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

In [30]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token

## Quantization
Quantization is done reduce memory footprint and perform faster inference while still retaining acceptable model performance. For this quantization, we will use bitandbytes

In [32]:
# Use 4 bit compute 
use_4bit = True

# compute dtype for 4-bit models
compute_dtype = "float16"

# quantization type
quantization_type = 'nf4'

# use double quantization
use_nested_quant = False

In [33]:
bnb_compute_dtype = getattr(torch, compute_dtype)

quantization_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=quantization_type,
    bnb_4bit_compute_dtype=bnb_compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
)

In [None]:
chat_input = tokenizer.encode_plus("[INST] Was Vivek Ramaswamy running for president ? [/INST]", return_tensors="pt")['input_ids'].to('cuda')

generated_ids = model.generate(chat_input, 
                               max_new_tokens=1000, 
                               do_sample=True, 
                               pad_token_id=tokenizer.eos_token_id)
decoded = tokenizer.batch_decode(generated_ids)

## Langchain And Prompt Engineering

In [None]:
!pip install -q langchain

In [35]:
from langchain import LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [None]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task='text-generation',
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000
)

In [None]:
prompt_template = """
### [INST]
Instruction: You are an expert political analyst with vast knowledge of the United States electoral process. You answer questions with 
certainty and you do not hallucinate. When unsure, you politely reply that you do not have  Using this knowledge, answer the following questions.
Here is a context to help:

{context}

### QUESTION:
{question}

[/INST]
"""

In [None]:
llm_pipeline = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
prompt = PromptTemplate(
    input_variables=['context', 'question']
    template=prompt_template
)

In [None]:
llm_chain = LLMChain(llm=llm_pipeline, prompt=prompt)

In [None]:
context = """
Vivek Ramaswamy, the multi-millionaire biotech entrepreneur and self-described intellectual godfather of the anti-woke movement, announced on Tuesday that he is running for president.
“We are in the middle of a national identity crisis,” he declared in an online video launching his campaign, offering that the current political climate constituted a form of “psychological slavery.”
Speaking straight to the camera, with an American flag draped in the background and a flag pin on his lapel, Ramaswamy framed his campaign as a broad counteroffensive to what he called the “woke left” — describing it as a threat to open speech, the free exchanging of ideas and American exceptionalism itself.
Ramaswamy is the third high-profile candidate to declare for the presidency in 2024. Though he filed forms with the FEC declaring he would be running on the Republican side of the aisle, his announcement video made no mention of the party itself — an indication that he hopes to frame his candidacy as outside the conventional political framework.
He has already done barnstorming in early nominating states, including Iowa, where he was well received even as some of the state’s political bigwigs professed to not having familiarity with the planks on which he was running.
Ramaswamy made his fortune in biotech investing, but he is best known for his appearances on Fox News and for the New York Times bestselling book he has written.
While his chances of securing the nomination are certainly long, Ramaswamy’s entry into the contest was greeted with a traditional flare from opposition Democrats. Shortly after he appeared on Fox News to elaborate on his decision to run, the Democratic National Committee sent out a statement.
“As Vivek Ramaswamy uses Tucker Carlson’s show to announce his campaign for president, one thing is clear: The race for the MAGA base is getting messier and more crowded by the day,” it read. “Over the next few months, Republicans are guaranteed to take exceedingly extreme positions on everything from banning abortion to cutting Social Security and Medicare and we look forward to continuing to ensure every American knows just how extreme the MAGA agenda is.”
"""

In [None]:
llm_chain.invoke({"context": context,
                 "question": "Was Vivek Ramaswamy running for president in the 2024 general election ?"})

## Langchain and Chroma DB

In [37]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

In [43]:
from chromadb import HttpClient
chroma_client = HttpClient()

In [47]:
vector_store = Chroma(client=chroma_client, 
                      embedding_function=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"), 
                      collection_name='us-election-gpt')

In [48]:
vector_store.as_retriever()

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x1364f8220>)

In [49]:
query = 'What is Donald Trump up to lately?'
query_embedding = tokenizer(query)

In [50]:
vector_store.similarity_search('What is Donald Trump up to lately', k=5)

[Document(page_content='Former President Donald Trump continues to make false claims about the New York civil fraud case he lost – including a wildly inaccurate declaration on Tuesday that an appeals court previously said that he had “won the case.”Trump has made similar claims for. He delivered an especially unequivocal version at a Tuesday campaign rally in Wisconsin, the day after heto prevent New York Attorney General Letitia James from beginning to collect on Judge Arthur Engoron’s $454 million judgment against him.Trump said: “The Appellate Division actually gave me the case. I won the case, because I won it in the Appellate Division. …I won the case because it’s called statute of limitations. The Appellate Division ruled in my favor. That means most of the case is gone; the judge refused to honor it. Now, he – nobody ever heard of that before. So the Appellate Division said, ‘You won the case, that’s it,’ and the judge said, ‘I don’t accept it.’ He’s called a rogue judge. He’s a

In [None]:
from langchain.llms import