In [1]:
import os, json, time
import gc
from IPython.display import display, Markdown 
from dotenv import load_dotenv

In [2]:
class CFG:
    OFFLINE = False #True # for Test offline environment
    USE_LLAMA3 = False # for GPU version
    USE_GAMMA = True # for GPU version
    USE_GEMMA2 = False # for GPU version only 
    TASK_GEN = True # for generative Text output task (suitable for RAG project)
    TEST_LLM = True
    USE_LMSTUIDO = False # for local LLM modle 
    model1 = "meta-llama/Meta-Llama-3-8B-Instruct"  # llama3 8B
    model2 =  "google/gemma-1.1-2b-it" #  gemma 2B
    model3 = "google/gemma-7b-it"# gemma 7B
    model4 =  "google/gemma-2-9b-it" # gemma 2 9B
    model5 =  'yentinglin/Llama-3-Taiwan-8B-Instruct' # Chinese version of llama3
    model6 = 'Qwen/Qwen-7B' # support Chinese version 
    model7 = "THUDM/chatglm-6b" # support Chinese version
    embedModel1 = 'intfloat/multilingual-e5-small' # for embedding model support chinese
    embedModel2 = "all-MiniLM-L6-v2"
    embedModel3 = "BAAI/bge-base-en-v1.5" # for embedding model support chinese
    embedModel4 = "BAAI/bge-m3" # for multilingual embedding model
    


    FEW_SHOT_TEST= False#True
    USE_RAG = True#False#False #True#True , in this project, prefer use fine tuning for p
    USE_WANDB = True#True # for  LLM evalution and debug , track fine tuning performance
    USE_TRULENS = False # for LLM evalution For RAG prefer 
    USE_DEEPEVAL = False # for LLM evalution   (require openAI API key)
    USE_TRAIN =  False #True #False#True Much be use GPU for Training 
    loggingSteps= 10#100 #100, #20, #5,#10,
    USE_FAISS = False#True # For RAG VectorDB
    USE_CHROMA = False#True #False # for RAG VectorDF
    USE_PINECONE = True#False #True # for RAG VectorDF
    maxTrainData = 200#3500#5000 #10000#5000 #10000
    maxEvalData = 20#100 # 20 
    maxToken=  512#768#512#768 # 512 for test only

In [3]:
import numpy as np
import pandas as pd
import transformers
import torch
from transformers import (AutoTokenizer, 
                          BitsAndBytesConfig,
                         AutoModelForCausalLM,
                         TrainingArguments)

from langchain_community.document_loaders import (TextLoader,
                                                  PyMuPDFLoader,
                                                  PyPDFDirectoryLoader,
                                                  PyPDFLoader)

# from langchain.document_loaders import PyPDFDirectoryLoader # old version of document loader

from langchain.prompts.prompt import  PromptTemplate

from langchain_community.vectorstores import FAISS #, Chroma,  Pinecone # old version of VectorStore



from langchain_text_splitters import (RecursiveCharacterTextSplitter,
                                      CharacterTextSplitter ,
                                       SentenceTransformersTokenTextSplitter)   


from langchain.embeddings import HuggingFaceEmbeddings # huggingfaceEmbedding deprecated , please use sentencetransformers 
from datasets import Dataset, DatasetDict, load_dataset


import evaluate
import trulens



In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [5]:
def clearMemory():
    for _ in range(5):
        torch.cuda.empty_cache()
        gc.collect()
        time.sleep(0.3)

### Get HuggingFace Hub Access for download model

In [6]:
load_dotenv()
huggingfaceToken = os.getenv("HuggingFace") #get huggeface token from .env file

In [7]:
if CFG.USE_WANDB:
    # train report to  W&B tool
    import wandb
    reportTo= "wandb"
    my_secret = os.getenv("wandb_api_key") 
    wandb.login(key=my_secret) # login 
else: 
    reportTo = "none"# None

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjohnsonhk88[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/johnsonhk88/.netrc


## LLM Model Initialization

In [8]:
# Quantized Config for GPU support only
bnb_config = BitsAndBytesConfig(
        load_in_4bit = True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True # Activate nested quantization for 4-bit base models (double quantization)

)


In [9]:
# tokenizer = AutoTokenizer.from_pretrained(CFG.model2, token=huggingfaceToken)

In [10]:
if CFG.USE_LMSTUIDO:
    # Point to the local server
    from langchain.llms import OpenAI
    import openai
    llmModel = "LMStudio"
    model = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

else: # load model from huggingface
    if device.type == "cuda": # use 7b/8b/9b model gain performance
        if CFG.USE_LLAMA3:
            modelSel = CFG.model1
            llmModel = "llama3_8b"
        
        elif CFG.USE_GEMMA2:
            modelSel = CFG.model4
            llmModel = "gemma2_9b"
    
        elif CFG.USE_GAMMA:
            modelSel = CFG.model3
            llmModel = "gemma_7b"
        else:
            modelSel = CFG.model2
            llmModel = 'gemma_2b'
        
        if CFG.TASK_GEN:
            model = AutoModelForCausalLM.from_pretrained(modelSel, device_map="auto",  
                                                 quantization_config= bnb_config ,
                                                 token=huggingfaceToken)

        else:
            model = AutoModelForCausalLM.from_pretrained(modelSel, device_map="auto",  
                                                 quantization_config= bnb_config, token=huggingfaceToken)
        tokenizer = AutoTokenizer.from_pretrained(modelSel, token=huggingfaceToken) # inital tokenizer
        tokenizer.padding_side = "right"
    

    else: # for cpu select smaller model
        modelSel = CFG.model2
        llmModel = 'gemma_2b'
        if CFG.TASK_GEN:
            model = AutoModelForCausalLM.from_pretrained(modelSel, device_map="auto", token=huggingfaceToken)

        else:
            model = AutoModelForCausalLM.from_pretrained(modelSel, device_map="auto", token=huggingfaceToken)

        tokenizer = AutoTokenizer.from_pretrained(modelSel, token=huggingfaceToken) # inital tokenizer
        tokenizer.padding_side = "right"
    

config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.11G [00:00<?, ?B/s]

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/34.2k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

In [11]:
model

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 3072, padding_idx=0)
    (layers): ModuleList(
      (0-27): 28 x GemmaDecoderLayer(
        (self_attn): GemmaSdpaAttention(
          (q_proj): Linear4bit(in_features=3072, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=3072, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=3072, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=3072, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear4bit(in_features=3072, out_features=24576, bias=False)
          (up_proj): Linear4bit(in_features=3072, out_features=24576, bias=False)
          (down_proj): Linear4bit(in_features=24576, out_features=3072, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
  

In [12]:
llmModel


'gemma_7b'

In [13]:
def delModel():
    global model, tokenizer
    del model
    del tokenizer

In [14]:
# delModel()

# Prompt Engineering

In [15]:

templatePrompt1 = """Question: {question}.\nOnly require given final result in JSON format with key 'answer'
            """
templatePrompt2 = "Answer the user Question.\n###\n{format_instructions}\n###\nQuestion: {query}\n"

## Generate LLM response

In [16]:
def generateResponse(query, maxOutToken = 512):
    """
    Direct send message to LLM model, get response
    """
    inputIds = tokenizer(query, return_tensors="pt").to(device)
    response = model.generate(**inputIds,
                              do_sample=True,
                              top_p=0.95,
                              top_k = 3,
                              temperature=0.5,
                              max_new_tokens= maxOutToken,
                             )
    return tokenizer.decode(response[0][len(inputIds["input_ids"]):], skip_special_tokens = True)
    
    

## simple parser for extract data

In [17]:
import re
from  json.decoder import JSONDecodeError
if CFG.TASK_GEN:

    def isInteger(text):
        try:
            if int(text) >= 0:
                return True
            else:
                return False
        except ValueError:
            return False

    def llmJSONparser(txt, key="answer"):
        """
        try to get answer from LLM response , expect in JSON format, 
        """
        try:
            subText = txt.split("{") # split several {} in list 
            for txtSeg in subText: # loop in list to find answer
                end = txtSeg.find("}") # find end position in text segment
                sub = txtSeg[:end] #subsring with {} context
                print(sub)
                temp = sub.replace("*", "") # remove * symbol
                temp = temp.replace("\"", "") # reomve \" symbol
                temp = temp.lower() # convert to lower case
                answerloc = temp.find(key) # find key word "answer" position
                if answerloc != -1:
                    print(f"find answer location : {answerloc}")
                    newTxt = temp[answerloc:] # substring start answer
#                   print("Temp: ", temp)
                    subTxt = newTxt.split("\n")
                    #       print(subTxt)
                    rel =subTxt[0][len(key):].strip() # get answer value with remove space
                    rel= rel.replace(',', '') # remove , symbol
                    print(rel)
                    return rel
                
            return None # can't find answer
        except :
            print(f"""Error LLM JSON parser input txt {txt}""" )
            return None
        return None


    def getLLMAnswerParser(txt, key="answer:"):
        """
        when json parser failure, seem answer not JSON format, 
        use "answer" for key word search final answer 
        """
         # find answer  
        temp = txt.replace("*", "") # remove * symbol
        temp = temp.replace("\"", "") # reomve "" symbol
        temp = temp.lower() # convert to lower case
        # find answer key word
        start = temp.find(key)
        print(f"Start loc: {start}")
        subStr = temp[start:]
        if start != -1:
            subTxt = subStr.split("\n")
           #print(subTxt)
            rel =subTxt[0][len(key):].strip() # get answer value with remove space
            rel= rel.replace(',', '') # remove , symbol
            print(rel)
            return rel
    
        print(subStr)
        return None

## Add parser  to control extreact data from  LLM Structure Output

In [18]:
from langchain_core.output_parsers import (StrOutputParser, 
                                           JsonOutputParser,
                                           PydanticOutputParser,
                                          )
# for LLM structure output
from langchain_core.pydantic_v1 import BaseModel, Field, validator
# from pydantic import BaseModel, Field

### Test LLM Model

In [19]:
%%time
if CFG.TEST_LLM:
    ret =generateResponse("What is Machine Learning?" , maxOutToken=256)
    print(ret)

What is Machine Learning?

Machine learning (ML) is a field of computer science that enables computers to learn from data, identify patterns, and make predictions. It involves teaching computers to learn from experience, rather than being explicitly programmed.

**Key Concepts:**

* **Learning:** The process of a computer acquiring knowledge from data.
* **Data:** Raw facts and figures used to train and test ML models.
* **Model:** A mathematical representation of a learning process that makes predictions or decisions.
* **Algorithm:** A set of rules or steps that guide the learning process.
* **Supervised Learning:** Training a model on labeled data, where the desired outputs are provided.
* **Unsupervised Learning:** Training a model on unlabeled data, where the desired outputs are not provided.

**Types of Learning:**

* **Supervised Learning:** Involves learning from labeled data, where the model is provided with examples of input-output pairs.
* **Unsupervised Learning:** Involves

In [20]:
if CFG.TEST_LLM:
    display(Markdown(ret)) # display in

What is Machine Learning?

Machine learning (ML) is a field of computer science that enables computers to learn from data, identify patterns, and make predictions. It involves teaching computers to learn from experience, rather than being explicitly programmed.

**Key Concepts:**

* **Learning:** The process of a computer acquiring knowledge from data.
* **Data:** Raw facts and figures used to train and test ML models.
* **Model:** A mathematical representation of a learning process that makes predictions or decisions.
* **Algorithm:** A set of rules or steps that guide the learning process.
* **Supervised Learning:** Training a model on labeled data, where the desired outputs are provided.
* **Unsupervised Learning:** Training a model on unlabeled data, where the desired outputs are not provided.

**Types of Learning:**

* **Supervised Learning:** Involves learning from labeled data, where the model is provided with examples of input-output pairs.
* **Unsupervised Learning:** Involves learning from unlabeled data, where the model is not provided with examples of input-output pairs.
* **Reinforcement Learning:** Involves learning through trial and error, where the model learns by interacting with its environment.

**Applications:**

* Image recognition


In [21]:
%%time
query = "What is Machine Learning?"
newPrompt = PromptTemplate(template=templatePrompt1,
                           input_variables=["question"])
finalPrompt = newPrompt.format(
                question=query    
            )
rel =generateResponse(finalPrompt, maxOutToken=1024)
# jsonTxt = getLLMAnswerParser(rel, key="answer")
# print(f"Question : {query}\nResponse Answer: {jsonTxt}")

CPU times: user 882 ms, sys: 0 ns, total: 882 ms
Wall time: 884 ms


In [22]:
print(rel)

Question: What is Machine Learning?.
Only require given final result in JSON format with key 'answer'
            {
                "answer": "Machine Learning is a field of computer science that focuses on enabling computers to learn from data, without explicit programming."
            }


In [9]:
clearMemory()

# Load PDF File

In [10]:
pdfFilePath1 = "../test-document/Attention .pdf"
pdfFilePath2 = "../test-document/yolo.pdf"
pdfDir = "../test-document"

In [11]:
# !pip install pypdf #

## Use Simple pypdf extract simple pdf text data. But can't extract complex layout and extract position picture information from pdf, for test propose
### Future, use different AI model extract complex data

In [12]:
# from  PyPDF2 import PdfReader
from pypdf import PdfReader

In [13]:
reader1 =PdfReader(pdfFilePath1)
type(reader1)

pypdf._reader.PdfReader

In [14]:
numPage = len(reader1.pages)

In [15]:
reader1

<pypdf._reader.PdfReader at 0x7124fcf26710>

In [16]:
currentPage=reader1.pages[0]

In [17]:
print(currentPage.extract_text())

Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.comNoam Shazeer∗
Google Brain
noam@google.comNiki Parmar∗
Google Research
nikip@google.comJakob Uszkoreit∗
Google Research
usz@google.com
Llion Jones∗
Google Research
llion@google.comAidan N. Gomez∗ †
University of Toronto
aidan@cs.toronto.eduŁukasz Kaiser∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗ ‡
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decoder through an attention
mechanism. We propose a new simple network architecture, the Transformer,
based solely on attention mechanisms, dispensing with recurrence and convolutions
entirely. Experime

In [18]:
def getPDFText(pdfDoc):
    '''
    get pdf text from pdf docs
    '''
    text="" 
    pdf_reader= PdfReader(pdfDoc) #read pdf file
    for page in pdf_reader.pages: # loop through pdf pages 
            text+= page.extract_text() # extract text from page and add to text variable
    return  text # return text variable

#### Use PyPDFDirectoryLoader from Langchain load PDF files from DirectoryLoader
<https://python.langchain.com/v0.2/docs/how_to/document_loader_pdf/#using-pypdf>

In [19]:
def getPDFDocs(directory):
    '''
    use PyPDFDirectoryLoader to extract pdf document from directory
    '''
    loader = PyPDFDirectoryLoader(directory) 
    docs = loader.load()
    return docs #text # return text variable

In [20]:
# rawTxt1 =getPDFText(pdfFilePath1)

In [21]:
# rawTxt2 = getPDFText(pdfFilePath2)
# type(rawTxt2)

In [22]:
docs =getPDFDocs(pdfDir)
docs

[Document(page_content='Real-Time Flying Object Detection with YOLOv8\nDillon Reis*, Jordan Kupec, Jacqueline Hong, Ahmad Daoudi\nGeorgia Institute of Technology\ndreis7@gatech.edu *, jkupec3@gatech.edu, jhong356@gatech.edu, adaoudi3@gatech.edu\nAbstract\nThis paper presents a generalized model for real-time\ndetection of ﬂying objects that can be used for transfer\nlearning and further research, as well as a reﬁned model\nthat is ready for implementation. We achieve this by train-\ning our ﬁrst (generalized) model on a data set containing\n40 different classes of ﬂying objects, forcing the model to\nextract abstract feature representations. We then perform\ntransfer learning with these learned parameters on a data\nset more representative of “real world” environments (i.e.\nhigher frequency of occlusion, small spatial sizes, rotations,\netc.) to generate our reﬁned model. Object detection of ﬂy-\ning objects remains challenging due to large variance ob-\nject spatial sizes/aspect rati

In [23]:
len(docs)

25

In [24]:
docs[0].metadata

{'source': '../test-document/yolo.pdf', 'page': 0}

In [25]:
# Defin RAG alogrithm
USE_SIMPLE_RAG = True#True# True # simple similairy approach 
USE_RERANK = False #False # advance RAG with Re-Ranking 
USE_QUERY_EXPANSION = False   # advance RAG with 
USE_EMBEDDING_ADAPER = False
if CFG.USE_WANDB: # define wandb RAG project name
    if USE_SIMPLE_RAG:
        wandbRAGProject = "ai-bank-statement-simple-rag"
    elif USE_RERANK:
        wandbRAGProject = "ai-bank-statement-re-ranking"
    elif USE_QUERY_EXPANSION:
        wandbRAGProject = "ai-bank-statement-query-expansion"
    elif USE_EMBEDDING_ADAPER:
        wandbRAGProject = "ai-bank-statement-embedding-adapter"
    else:
        wandbRAGProject = "ai-bank-statement-simple-rag"

In [26]:
if CFG.USE_WANDB and CFG.USE_RAG:
     # Start a new wandb run
    runTask1 = wandb.init(project=wandbRAGProject, job_type="generation", anonymous="allow")
    # define W&B Table
    wandbCol1 =  ["model", "question",  "llm_generate", "llm_answer"]
    wandbRAGTable =wandb.Table(columns=wandbCol1)

## inital Embedding Model

In [27]:
if CFG.USE_RAG:
    if device.type == "cuda":
        model_kwargs = {"device": "cuda"}
        multiProcess=  False#True # 
    else:
        model_kwargs = {"device": "cpu"}
        multiProcess= False
    def embeddingModelInit(modelName):
        embed =  HuggingFaceEmbeddings(model_name=modelName, model_kwargs= model_kwargs, multi_process=multiProcess)#initial embedding model 
        return embed

In [28]:
if CFG.USE_RAG:
    embedding = embeddingModelInit(CFG.embedModel3)

In [29]:
vector= embedding.embed_query("Hello, how are you?")
len(vector) # checking vector length

768

## Prepare RAG 

# Text split into Chunking 

In [30]:
if CFG.USE_RAG:
    if USE_SIMPLE_RAG:
        textSplitter = RecursiveCharacterTextSplitter(
                                chunk_size= 800, #1500,
                                chunk_overlap=20, #100,
                                add_start_index=True,
                                separators=["\n\n", "\n", ".", " ", ""],
                                  length_function=len,
                                is_separator_regex=False)
    else: # advance RAG possabile use adv method
        textSplitter = CharacterTextSplitter(chunk_size=1500, 
                                             chunk_overlap=100,
                                             length_function=len,
                                             separator="\n\n",
                                             is_separator_regex=False)
        

In [31]:
if CFG.USE_RAG:
    def splitTextDataFromText(text):
        chunk = textSplitter.split_text(text)
        return textSplitter.create_documents(chunk) # create docs
    
    def chunkText(docs, chunkSize=800, chunkOverlap=50):
        # Split text into chunks
        splitter = RecursiveCharacterTextSplitter(chunk_size=chunkSize, chunk_overlap=chunkOverlap)
        chunks = splitter.split_documents(docs)
        return chunks
        

In [32]:
# len(rawTxt1)
# docList = splitTextDataFromText(rawTxt1)
# len(docList)

In [33]:
len(docs)

25

In [34]:
chunkedDocs = chunkText(docs) # chunk text into documents
chunkedDocs

[Document(page_content='Real-Time Flying Object Detection with YOLOv8\nDillon Reis*, Jordan Kupec, Jacqueline Hong, Ahmad Daoudi\nGeorgia Institute of Technology\ndreis7@gatech.edu *, jkupec3@gatech.edu, jhong356@gatech.edu, adaoudi3@gatech.edu\nAbstract\nThis paper presents a generalized model for real-time\ndetection of ﬂying objects that can be used for transfer\nlearning and further research, as well as a reﬁned model\nthat is ready for implementation. We achieve this by train-\ning our ﬁrst (generalized) model on a data set containing\n40 different classes of ﬂying objects, forcing the model to\nextract abstract feature representations. We then perform\ntransfer learning with these learned parameters on a data\nset more representative of “real world” environments (i.e.', metadata={'source': '../test-document/yolo.pdf', 'page': 0}),
 Document(page_content='higher frequency of occlusion, small spatial sizes, rotations,\netc.) to generate our reﬁned model. Object detection of ﬂy-\nin

In [35]:
len(chunkedDocs)

114

## Store Document in VectorDB
### langchain new version required different vector database install different vector database libraries and Call API
### for Chroma
<https://python.langchain.com/v0.2/docs/integrations/vectorstores/chroma/>

### for Pinecone
<https://python.langchain.com/v0.2/docs/integrations/vectorstores/pinecone/>

### for FAISS
<https://python.langchain.com/v0.2/docs/integrations/vectorstores/faiss/>

In [37]:
%%time
if CFG.USE_RAG:
    # VectorDB inital and store document into Vector DB
    if CFG.USE_CHROMA:
        from langchain_chroma import Chroma
        db = Chroma.from_documents(documents= chunkedDocs, embedding=embedding, persist_directory="./chroma_db")
    elif CFG.USE_FAISS:
        db = FAISS.from_documents(documents =chunkedDocs, embedding= embedding)
        print(db.index.ntotal) # number of total index size
    elif CFG.USE_PINECONE:
        from pinecone import Pinecone, ServerlessSpec
        from langchain_pinecone import PineconeVectorStore
        os.environ['PINECONE_API_KEY'] = os.getenv("PINECONE_API_KEY")# 
        pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])

        import time

        index_name = "langchainvector2"  # piecond  db index name , can change if desired

        existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

        if index_name not in existing_indexes:
            pc.create_index(
                name=index_name,
                dimension=768,
                metric="cosine",
                spec=ServerlessSpec(cloud="aws", region="us-east-1"),
            )
            while not pc.describe_index(index_name).status["ready"]:
                time.sleep(1)

        index = pc.Index(index_name)
        # Connect to Pinecone index and insert the chunked docs as contents
        db =PineconeVectorStore.from_documents(chunkedDocs, embedding, index_name=index_name)

CPU times: user 1.08 s, sys: 52.2 ms, total: 1.13 s
Wall time: 5.45 s


In [38]:
db

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x7124fc233190>

# 


# Test Query for Vector DB

In [39]:
query = "What is Yolo?"
results = db.similarity_search(query)

In [40]:
results

[Document(page_content='feature maps. If our model shows similar feature activations\nfor F-14s and F-18s, we can say that may be the reason for\nclass confusion.\nMMYolo [21] by Yamaguchi et al. is an open-sourcetoolbox for YOLO series algorithms based on PYTorch.\nMMYolo can decompose the most popular YOLO algo-\nrithms, making them easily customizable and ready for\nanalysis. For our analysis, we employed MMYolo to ﬁrst\nconvert the weights from .pt (Pytorch model) to .pth (State\ndictionary ﬁle, i.e., weights, bias, etc.) and second visualize\nthe different activation maps of YOLOv8 during inference.\nMMYolo allows you to specify model type, weight ﬁle, tar-\nget layer, and channel reduction.\nYOLOv8 6 uses CSPDarknet53 [13] as its backbone, a\ndeep neural network that extracts features at multiple res-', metadata={'page': 3.0, 'source': '../test-document/yolo.pdf'}),
 Document(page_content='Figure 6: YOLOv8 Architecture [16]\nYOLOv5 incorporates anchor boxes, ﬁxed-sized bound-\nin

In [41]:
if CFG.USE_RAG:
    if USE_SIMPLE_RAG:
        num_docs= 2 # set max top k=  3 rank similarity  
    elif  USE_RERANK:
        num_docs =15 # re-ranking use highter order 
    else: 
        num_docs =3
    retriever  = db.as_retriever( 
        search_type="mmr",  # Also test "similarity"
        search_kwargs={"k": num_docs})

## Test for VectorDB with retriever

In [42]:
query = "What is Yolo?"
results = retriever.invoke(query)
print(len(results))
print(results)

2
[Document(page_content='feature maps. If our model shows similar feature activations\nfor F-14s and F-18s, we can say that may be the reason for\nclass confusion.\nMMYolo [21] by Yamaguchi et al. is an open-sourcetoolbox for YOLO series algorithms based on PYTorch.\nMMYolo can decompose the most popular YOLO algo-\nrithms, making them easily customizable and ready for\nanalysis. For our analysis, we employed MMYolo to ﬁrst\nconvert the weights from .pt (Pytorch model) to .pth (State\ndictionary ﬁle, i.e., weights, bias, etc.) and second visualize\nthe different activation maps of YOLOv8 during inference.\nMMYolo allows you to specify model type, weight ﬁle, tar-\nget layer, and channel reduction.\nYOLOv8 6 uses CSPDarknet53 [13] as its backbone, a\ndeep neural network that extracts features at multiple res-', metadata={'page': 3.0, 'source': '../test-document/yolo.pdf'}), Document(page_content='https://arxiv.org/pdf/1506.02640.pdf . 7\n[13] Joseph Redmon and Ali Farhadi. Yolov3: An i

## LLM RAG Test

In [43]:
from langchain.chains.question_answering import load_qa_chain

In [None]:
chain=load_qa_chain(model ,chain_type="stuff")