## Pip install dependencies

In [None]:
!pip install llama-index
!pip install llama-index-core==0.10.6.post1
!pip install llama-index-postprocessor-flag-embedding-reranker
!pip install git+https://github.com/FlagOpen/FlagEmbedding.git
!pip install llama-parse
%pip install -U llama-index llama-index-embeddings-nomic
%pip install llama-index-llms-ollama

## Import Libraries

In [31]:
# llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio
import nest_asyncio
nest_asyncio.apply()

import os
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.nomic import NomicEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core import Settings

In [15]:
# Load the .env file
load_dotenv()

# Access the Nomic API key
nomic_api_key = os.getenv("NOMIC_API_KEY")

# Access to Llama-cloud API Key
llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")


llm = Ollama(model="llama3.1:latest", request_timeout=120.0)
embed_model = NomicEmbedding(
    api_key=nomic_api_key,
    dimensionality=128,
    model_name="nomic-embed-text-v1.5",
)

Settings.llm = llm
Settings.embed_model = embed_model

In [16]:
from llama_parse import LlamaParse

documents = LlamaParse(result_type="markdown").load_data("/Users/angelmurillo/Desktop/OpenSource_RAG_LLM/data/AVD-005-MAB_INT_VIAL_RELEASE_1.pdf")

Started parsing the file under job_id a1980be9-5742-4bf9-b1b8-99ed1f48dc8e


In [17]:
documents[0].text

'# Lonzzo\n\n# Batch Report\n\n# Batch Header Information\n\nOrder Number: AVD-005-MAB_INT_VIAL_RELEASE_1\n\nQuantity: 1 PC\n\nVersion: 23\n\nProduct: (MC1S1_000010) MR_INT_VIAL_RELEASE_T\n\nRecipe: (MR_INT_VIAL_RELEASE_T)\n\nWD Start: EWI Start: 27-Jul-2023\n\nEWI End: 27-Jul-2023\n\nOrder Signoff: 27-Jul-2023\n\n08:28:42 09:32:35 09:33:54\n\n# Table of Contents\n\n- Review Signatures\n- Critical / Other / Spez Parameters\n- Recipe Signatures (Non-Instruction)\n- Comments\n- Attachments\n- Bill of Materials\n- Output Materials\n- Samples\n- Trends of Critical / Other Process Parameters\n- Instructions\n\n# Linked Workflows - Table of Contents\n\nNone\n\n# Review Signatures - Table of Contents\n\nNone\n\n# QA Signature\n\nNone\n\n# Order Review Signature\n\n|Name|Date|\n|---|---|\n|system (System Account)|27-Jul-2023 09:33:52|\n\n# Last Approval Signature\n\n|Name|Date|\n|---|---|\n|system (System Account)|27-Jul-2023 09:33:54|\n\nAVD-005-MAB_INT_VIAL_RELEASE_1 Page 1 of 26'

# Setting up Reranker

In [18]:
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker

reranker = FlagEmbeddingReranker(
    top_n=5,
    model="BAAI/bge-reranker-large",
)

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

In [19]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex

reader = SimpleDirectoryReader(input_files=["/Users/angelmurillo/Desktop/OpenSource_RAG_LLM/data/AVD-005-MAB_INT_VIAL_RELEASE_1.pdf"])
base_docs = reader.load_data()
raw_index = VectorStoreIndex.from_documents(base_docs)
raw_query_engine = raw_index.as_query_engine(
    similarity_top_k=5, node_postprocessors=[reranker]
)

## Testing Queries

In [20]:
query = "What is the cell line name"

response_1 = raw_query_engine.query(query)
print("\n***********Basic Query Engine***********")
print(response_1)


***********Basic Query Engine***********
AV0122


In [21]:
query = "What is the cell bank ID?"

response_1 = raw_query_engine.query(query)
print("\n***********Basic Query Engine***********")
print(response_1)


***********Basic Query Engine***********
1218 -W.


## Using Redis to cache prompts

In [23]:
import redis
import hashlib
import json
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.postprocessor import SentenceTransformerRerank

In [27]:
# Set up Redis connection

redis_client = redis.Redis(
  host='redis-13600.c274.us-east-1-3.ec2.redns.redis-cloud.com',
  port=13600,
  password='k2WbUc23FzUf4x3Kgb3qDJ40NeCAu6Lr')

# Set up RAG components
reader = SimpleDirectoryReader(input_files=["/Users/angelmurillo/Desktop/OpenSource_RAG_LLM/data/AVD-005-MAB_INT_VIAL_RELEASE_1.pdf"])
base_docs = reader.load_data()
raw_index = VectorStoreIndex.from_documents(base_docs)
reranker = reranker
raw_query_engine = raw_index.as_query_engine(
    similarity_top_k=5, node_postprocessors=[reranker]
)

def get_cache_key(query):
    # Create a unique key based on the query
    return hashlib.md5(query.encode()).hexdigest()

def get_cached_result(query):
    cache_key = get_cache_key(query)
    cached_result = redis_client.get(cache_key)
    if cached_result:
        return json.loads(cached_result)
    return None

def set_cached_result(query, result, expiration_time=3600):
    cache_key = get_cache_key(query)
    redis_client.setex(cache_key, expiration_time, json.dumps(result))

def process_query(query):
    # Check if result is in cache
    cached_result = get_cached_result(query)
    if cached_result:
        print("Cache hit!")
        return cached_result

    # If not in cache, perform the query processing
    print("Cache miss. Processing query...")
    result = raw_query_engine.query(query)

    # Convert the result to a serializable format
    serializable_result = {
        'response': str(result.response),
        'source_nodes': [
            {
                'node_id': node.node.node_id,
                'score': node.score,
                'text': node.node.text,
            } for node in result.source_nodes
        ]
    }

    # Cache the result
    set_cached_result(query, serializable_result)

    return serializable_result

In [33]:
# Usage example
if __name__ == "__main__":
    query = "From the cell bank information, what is the generation number?"

    # First query - should be a cache miss
    result = process_query(query)
    print(result['response'])

    # Second query with the same parameters - should be a cache hit
    result = process_query(query)
    print(result['response'])

Cache miss. Processing query...
The generation number.
Cache hit!
The generation number.
