### Installing Librabries

In [None]:
%%time

from IPython.display import clear_output
! pip install sentence_transformers==2.2.2
! pip install -qq -U langchain
! pip install -qq -U tiktoken
! pip install -qq -U pypdf
! pip install -qq -U faiss-gpu
! pip install -qq -U InstructorEmbedding
! pip install -qq -U transformers
! pip install -qq -U accelerate
! pip install -qq -U bitsandbytes
clear_output()

CPU times: user 1.35 s, sys: 226 ms, total: 1.57 s
Wall time: 2min 59s


In [None]:
!pip install -i https://pypi.org/simple/ bitsandbytes

Looking in indexes: https://pypi.org/simple/


In [None]:
%%time

import warnings
warnings.filterwarnings("ignore")

import os
import glob
import textwrap
import time

import langchain

### loaders
from langchain.document_loaders import PyPDFLoader, DirectoryLoader

### splits
from langchain.text_splitter import RecursiveCharacterTextSplitter

### prompts
from langchain import PromptTemplate, LLMChain

### vector stores
from langchain.vectorstores import FAISS

### models
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings

### retrievers
from langchain.chains import RetrievalQA

import torch
import transformers
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline
)

clear_output()

CPU times: user 10.4 s, sys: 1.62 s, total: 12.1 s
Wall time: 18.5 s


In [None]:
print('langchain:', langchain.__version__)
print('torch:', torch.__version__)
print('transformers:', transformers.__version__)

langchain: 0.1.20
torch: 2.2.1+cu121
transformers: 4.40.2


### Configuration: The code sets up a configuration class CFG with various parameters such as the model name, temperature, top-p, repetition penalty, chunk size for splitting text, embeddings model repository, number of similar passages to retrieve, and paths.

In [None]:
class CFG:
    # LLMs
    model_name = 'llama2-7b-chat'
    temperature = 0
    top_p = 0.95
    repetition_penalty = 1.15
    # splitting
    split_chunk_size = 800
    split_overlap = 0
    # embeddings
    embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
    # similar passages
    k = 3

    PDFs_path = '/content/NLP_dataset.pdf'


### Model Initialization: The get_model function initializes a language model and tokenizer based on the specified model_name in the configuration. It downloads the specified model and tokenizer from the Hugging Face model hub. The BitsAndBytesConfig is used for efficient quantization of model parameters.



In [None]:
def get_model(model = CFG.model_name):

    print('\nDownloading model: ', model, '\n\n')

    if model == 'wizardlm':
        model_repo = 'TheBloke/wizardLM-7B-HF'

        tokenizer = AutoTokenizer.from_pretrained(model_repo)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True
        )

        max_len = 1024

    elif model == 'llama2-7b-chat':
        model_repo = 'daryl149/llama-2-7b-chat-hf'

        tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True,
            trust_remote_code = True
        )

        max_len = 2048

    else:
        print("Not implemented model (tokenizer and backbone)")

    return tokenizer, model, max_len

In [None]:
%%time

tokenizer, model, max_len = get_model(model = CFG.model_name)

clear_output()


Downloading model:  llama2-7b-chat 




ImportError: Using `bitsandbytes` 8-bit quantization requires Accelerate: `pip install accelerate` and the latest version of bitsandbytes: `pip install -i https://pypi.org/simple/ bitsandbytes`

In [None]:
model.eval()

NameError: name 'model' is not defined

In [None]:
model.hf_device_map

### Setting Up Hugging Face Pipeline: The code sets up a pipeline for text generation using the Hugging Face pipeline function. It utilizes the pre-trained model and tokenizer obtained earlier. Various parameters such as max_length, temperature, top_p, and repetition_penalty are configured for text generation.

In [None]:
### hugging face pipeline
pipe = pipeline(
    task = "text-generation",
    model = model,
    tokenizer = tokenizer,
    pad_token_id = tokenizer.eos_token_id,
#     do_sample = True,
    max_length = max_len,
    temperature = CFG.temperature,
    top_p = CFG.top_p,
    repetition_penalty = CFG.repetition_penalty
)

### langchain pipeline
llm = HuggingFacePipeline(pipeline = pipe)

In [None]:
llm

HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7cb20f38e6b0>)

In [None]:
%%time
query = "Chennai places"
llm.invoke(query)

CPU times: user 56.8 s, sys: 211 ms, total: 57 s
Wall time: 1min 4s


"Chennai places to visit, Chennai sightseeing, Chennai tourism, Chennai travel guide, Chennai trip planner, India holidays, Indian cities, South IndiaLeave a comment on Chennai: A City of Rich History and Culture\n\nChennai, the capital city of Tamil Nadu, is a vibrant metropolis that offers a unique blend of traditional culture and modern amenities. Located on the southeastern coast of India, Chennai has a rich history dating back to the 16th century when it was a major port city for the British East India Company. Today, it is one of the largest cities in India and an important cultural hub in South Asia.\nOne of the most popular attractions in Chennai is the Kapaleeswarar Temple, a beautiful Shiva temple built in the 7th century AD. The temple is famous for its intricate carvings and sculptures, as well as its towering gopuram (gateway) that rises over 40 feet high. Another must-visit attraction is the Fort St. George Museum, which houses a vast collection of artifacts and exhibits 

In [None]:
%%time
query = "Plan a trip to goa for 1 day"
llm.invoke(query)

CPU times: user 41.4 s, sys: 0 ns, total: 41.4 s
Wall time: 41.6 s


"Plan a trip to goa for 1 day. Unterscheidung between the two is not always clear-cut, and different people may have different preferences when it comes to which type of vacation they want to take.\nThe best way to plan a trip to Goa depends on your personal preferences and interests. Here are some steps you can follow:\n1. Determine the purpose of your trip: Are you looking for a relaxing beach vacation or an adventure-filled getaway? Knowing the purpose of your trip will help you decide where to stay, what activities to do, and how long to stay in Goa.\n2. Choose your travel dates: Goa has a tropical climate, so the best time to visit is from October to May. Avoid planning your trip during the monsoon season (June to September) as the weather can be unpredictable and many tourist attractions may be closed.\n3. Decide on your accommodation: Goa offers a wide range of accommodations, from budget-friendly guesthouses to luxury resorts. Consider factors like location, amenities, and pric