# DAY 2 : RAG

## Passing documents & citations

### Supported models (https://docs.oracle.com/en-us/iaas/Content/generative-ai/chat-models.htm) 
- cohere.command-r-08-2024
- cohere.command-r-16k
- cohere.command-r-plus
- cohere.command-r-plus-08-2024
- meta.llama-3.1-405b-instruct
- meta.llama-3.1-70b-instruct
- meta.llama-3.2-90b-vision-instruct
  
Questions use #generative-ai-users  or ##igiu-innovation-lab slack channels 
if you have errors running sample code reach out for help in #igiu-ai-learning

### Set up variables

In [1]:
# set up the  variables


from oci.generative_ai_inference import GenerativeAiInferenceClient
from oci.generative_ai_inference.models import *
import oci
import os,json

#####
#make sure your sandbox.json file is setup for your environment. You might have to specify the full path depending on  your `cwd` 
#####
SANDBOX_CONFIG_FILE = "sandbox.json"

LLM_MODEL = "cohere.command-r-plus-08-2024" 
llm_service_endpoint= "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
llm_client = None
llm_payload = None


PREAMBLE = "provide factual answers based of document provided nclude citations if you can. Say you cant answer if the answer is not in provided documents "
MESSAGE = ""

### load the profile

In [2]:
scfg = None
# read the sandbox config 
with open(os.path.expanduser(SANDBOX_CONFIG_FILE), 'r') as f:
                scfg=  json.load(f)

#read the oci config
config = oci.config.from_file(os.path.expanduser(scfg["oci"]["configFile"]),scfg["oci"]["profile"])
         

In [3]:
### set up Chat Request

In [None]:


# chat request      
llm_chat_request = CohereChatRequest()
#llm_chat_request.preamble_override = PREAMBLE 
llm_chat_request.message = MESSAGE
llm_chat_request.is_stream = False 
llm_chat_request.max_tokens = 500 # max token to generate, can lead to incomplete responses
llm_chat_request.temperature = 1.0 # higer value menas more randon, defaul = 0.3
#llm_chat_request.seed = 7555 # makes the best effort to make answer determininstic , not gaureented 
llm_chat_request.top_p = 0.7  # ensures only tokens with toptal probabely of p are considered, max value = 0.99, min 0.01, default 0.75
llm_chat_request.top_k = 0  #Ensures that only top k tokens are considered, 0 turns it off, max = 500
llm_chat_request.frequency_penalty = 0.0 # reduces the repeatedness of tokens max value 1.9=0, min 0,0
#cohere_chat_request.documents = get_documents()  # will only answer from supplied documnets not frm its own knopwledge
cohere_chat_request.citation_quality =  CohereChatRequest.CITATION_QUALITY_FAST # FAST or accurate
#cohere_chat_request.citation_quality =  cohere_chat_request.CITATION_QUALITY_ACCURATE # FAST or accurate



### Add documents

In [6]:
docs = [
                 {
                        "title": "Oracle",
                        "snippet": "Oracle database services and products offer customers cost-optimized and high-performance versions of Oracle Database, the world's leading converged, multi-model database management system, as well as in-memory, NoSQL and MySQL databases. Oracle Autonomous Database, available on premises via Oracle Cloud@Customer or in the Oracle Cloud Infrastructure, enables customers to simplify relational database environments and reduce management workloads.",
                        "website": "https://www.oracle.com/database",
                        "id": "ORA001"
                },
                 {
                        "title": "Amazon",
                        "snippet": """ AWS provides the broadest selection of purpose-built databases allowing you to save, grow, and innovate faster.
Purpose Built
Choose from 15+ purpose-built database engines including relational, key-value, document, in-memory, graph, time series, wide column, and ledger databases.
Performance at Scale
Get relational databases that are 3-5X faster than popular alternatives, or non-relational databases that give you microsecond to sub-millisecond latency.
Fully Managed
AWS continuously monitors your clusters to keep your workloads running with self-healing storage and automated scaling, so that you can focus on application development.
Secure & Highly Available
AWS databases are built for business-critical, enterprise workloads, offering high availability, reliability, and security.
""",
                        "website": "https://aws.amazon.com/free/database/e",
                        "id": "AWS001"
                }
]
llm_chat_request.documents = docs

## Set up chat details

In [7]:


# set up chat details
chat_detail = ChatDetails()
chat_detail.serving_mode = OnDemandServingMode(model_id=LLM_MODEL)
chat_detail.compartment_id =scfg["oci"]["compartment"] 
chat_detail.chat_request = llm_chat_request


### get the LLM client 

In [8]:

# set up the LLM client 
llm_client = GenerativeAiInferenceClient(
                config=config,
                service_endpoint=llm_service_endpoint,
                retry_strategy=oci.retry.NoneRetryStrategy(),
                timeout=(10,240))

### Ask the question 

In [None]:
#llm_chat_request.seed = 7555 # trting changing to see if we can reproduce teh opriginal response
llm_chat_request.message = "tell me more about AWS"
llm_response = llm_client.chat(chat_detail)
llm_text = llm_response.data.chat_response.text
        
print("**************************Chat Result**************************")
#llm_text = llm_response.data.chat_response.text
print(llm_response.data.chat_response.text)
print("************************** Citations**************************")
print(llm_response.data.chat_response.citations)

### update the history 

In [12]:
# update history 
previous_chat_message = oci.generative_ai_inference.models.CohereUserMessage(message="Tell me something about Oracle.")
previous_chat_reply = oci.generative_ai_inference.models.CohereChatBotMessage(message="Oracle is one of the largest vendors in the enterprise IT market and the shorthand name of its flagship product. The database software sits at the center of many corporate IT")
        
llm_chat_request.chat_history =  [previous_chat_message, previous_chat_reply]

### ask the question again

In [None]:
#llm_chat_request.documents = []
llm_chat_request.message = "tell me more about its databases"
llm_response = llm_client.chat(chat_detail)
llm_text = llm_response.data.chat_response.text
        
print (llm_text)

*** print response & citations

In [None]:
print("**************************Chat Result**************************")
#llm_text = llm_response.data.chat_response.text
print(llm_response.data.chat_response.text)
print("************************** Citations**************************")
print(llm_response.data.chat_response.citations)