### Necessary imports

In [1]:
#!pip install -q -U torch datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu pypdf "unstructured[all-docs]"
#!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

In [2]:
from rich.console import Console
import argparse
console = Console()

import sys
sys.path.append('../wizardlib/')
import wizardlib as wizard

import os

In [3]:
%env PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0

env: PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0


In [4]:
ok_prefix = "[bold green]->[/bold green]"
nok_prefix = "[bold red]->[/bold red]"

platform = wizard.getPlatform()
hasGPU = wizard.hasGPU(platform)

#--------------------------
# Test GPU availability
#--------------------------
if not hasGPU:
    console.print(nok_prefix + "[bold red]GPU is not available. Please make sure your system has a compatible GPU.[/bold red]")
    console.print("You either need a PC with an NVdia GPU or a Mac with Apple M1/M2/M3 GPU.")
    exit(1)

gpuDevice = wizard.getDevice(platform)

console.print(f"Platform Detected: [bold green]{platform}[/bold green] with device [bold green]{gpuDevice}[/bold green]")

### Dependencies

In [5]:
from pathlib import Path

import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    #BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import PyPDFLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [6]:
# Check that MPS is available
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")

else:
    mps_device = torch.device("mps")

### Load quantized Mistal 7B

In [7]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.1'

# set use_fast=False/True?
tokenizer = AutoTokenizer.from_pretrained(model_name, add_eos_token=True, trust_remote_code=True, use_fast=False)
tokenizer.padding_side = "right"

# only for Cuda GPUs
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype="float16",
#     bnb_4bit_use_double_quant=True,
# )

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #load_in_8bit=False,
    #torch_dtype=torch.float16,
    #device_map=mps_device,
    trust_remote_code=True,
    #quantization_config=bnb_config,
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:28<00:00, 14.37s/it]


In [8]:
# when using M-series Macbooks. Load on GPUs.
model = model.to("mps")

### Count number of trainable parameters

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 7241732096
all model parameters: 7241732096
percentage of trainable model parameters: 100.00%


### Build Mistral text generation pipeline

In [None]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
    device= "mps"
)

In [None]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

### Load and chunk documents. Load chunked documents into FAISS index 

In [17]:
loader = PyPDFLoader("../data/test_parsing/COVID19THEM21@CON2021.pdf")
pages = loader.load()

In [18]:
# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=100, 
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(pages)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents, 
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()

### Create PromptTemplate and LLMChain

In [None]:
prompt_template = """
### [INST] Instruction: Answer the question based on your knowledge of the financial expenditures in Uganda during the Covid pandemic:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [None]:
rag_chain = ( 
 {"context": retriever, 
  "question": RunnablePassthrough()}
    | llm_chain
)

In [16]:
%%time
result = rag_chain.invoke("Did educational expenditures decrease during Covid?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


KeyboardInterrupt: 

In [None]:
result["text"]

'\nThere is no information provided in the documents about changes in educational expenditures specifically related to the Covid-19 pandemic.'