### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [1]:
pip install langchain_nvidia_ai_endpoints

Collecting langchain_nvidia_ai_endpoints
  Downloading langchain_nvidia_ai_endpoints-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting langchain-core<0.3,>=0.1.27 (from langchain_nvidia_ai_endpoints)
  Downloading langchain_core-0.2.1-py3-none-any.whl.metadata (5.9 kB)
Collecting langsmith<0.2.0,>=0.1.0 (from langchain-core<0.3,>=0.1.27->langchain_nvidia_ai_endpoints)
  Downloading langsmith-0.1.60-py3-none-any.whl.metadata (13 kB)
Collecting packaging<24.0,>=23.2 (from langchain-core<0.3,>=0.1.27->langchain_nvidia_ai_endpoints)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting pydantic<3,>=1 (from langchain-core<0.3,>=0.1.27->langchain_nvidia_ai_endpoints)
  Downloading pydantic-2.7.1-py3-none-any.whl.metadata (107 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.3/107.3 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.0->langchain-core<0.3,>=0.1.27->langchain_nvidia_ai_endpoints

In [83]:
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA

In [2]:
pip install langchain-community langchain-text-splitters faiss-gpu

Collecting langchain-community
  Downloading langchain_community-0.2.0-py3-none-any.whl.metadata (8.8 kB)
Collecting langchain-text-splitters
  Downloading langchain_text_splitters-0.2.0-py3-none-any.whl.metadata (2.2 kB)
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain-community)
  Downloading SQLAlchemy-2.0.30-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.6-py3-none-any.whl.metadata (25 kB)
Collecting langchain<0.3.0,>=0.2.0 (from langchain-community)
  Downloading langchain-0.2.0-py3-none-any.whl.metadata (13 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.21.2-py3-none-any.whl.metadata (7.1 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.

In [84]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

In [85]:
#loader =  WebBaseLoader("https://docs.smith.langchain.com/user_guide")
loader = TextLoader("state_of_the_union.txt") # path adjusted
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
docs

[Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  \n\nLast year COVID-19 kept us apart. This year we are finally together again.', metadata={'source': 'state_of_the_union.txt'})]

In [86]:
NVIDIA_API_KEY = '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'
OPENAI_API_KEY = 'sk-proj-KJxdo8rCIHFI9Ex2VViHT3BlbkFJdArXniYczoft6puyKd6g'
import os
os.environ['NVIDIA_API_KEY'] = NVIDIA_API_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [90]:
#embeddings = NVIDIAEmbeddings()

In [122]:
# Prepare embedding function
from langchain.embeddings.openai import OpenAIEmbeddings
headers = {"x-api-key": os.environ["OPENAI_API_KEY"]}
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", headers=headers)

In [123]:
from langchain_community.vectorstores import FAISS
from langchain.vectorstores import Chroma
#from langchain_text_splitters import RecursiveCharacterTextSplitter

# Try to get vectordb with FAISS
#db = FAISS.from_documents(docs, embeddings)
# Try to get vectordb with Chroma
db = Chroma.from_documents(docs, embeddings)
#documents = text_splitter.split_documents(docs)
#documents
#vector = FAISS.from_documents(documents, embeddings)
retriever = db.as_retriever()
retriever



BadRequestError: Error code: 400 - {'error': {'message': 'invalid model ID', 'type': 'invalid_request_error', 'param': None, 'code': None}}

In [113]:

#model = ChatNVIDIA(model="mistral_7b")
#model = 'model="gpt-3.5-turbo"'
from openai import OpenAI
client = OpenAI()

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},
    {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
  ]
)

In [114]:
print(completion.choices[0].message)

ChatCompletionMessage(content="In the realm of code, a concept profound,\nLies recursion, a loop of magic and sound,\nA function that calls itself to explore,\nA labyrinth of patterns like never before.\n\nLike a whirlpool spiraling without end,\nRecursion delves deep, a curious trend,\nDividing tasks into smaller parts to see,\nThe beauty of logic, unleashed and free.\n\nWith elegance and grace, it works its charm,\nSolving puzzles with a mystical arm,\nEach iteration a journey anew,\nUnraveling mysteries, creating breakthrough.\n\nBut beware the depths of recursive might,\nFor infinite loops can cause a plight,\nWith stack overflowing, a memory sea,\nReckless recursion leads to jeopardy.\n\nSo tread with care in this enchanted land,\nWhere patterns dance at a programmer's hand,\nFor recursion's magic, a gift and a test,\nIn the realm of code, where dreams manifest.", role='assistant', function_call=None, tool_calls=None)


In [115]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


model = ChatNVIDIA(model="gpt-3.5-turbo")

In [116]:
model

ChatNVIDIA(client=NVEModel(base_url='https://api.nvcf.nvidia.com/v2/nvcf', get_session_fn=<class 'requests.sessions.Session'>, get_asession_fn=<class 'aiohttp.client.ClientSession'>, endpoints={'infer': '{base_url}/pexec/functions/{model_id}', 'status': '{base_url}/pexec/status/{request_id}', 'models': '{base_url}/functions'}, api_key=SecretStr('**********'), timeout=60, interval=0.02, last_inputs={}, last_response=None, payload_fn=<function default_payload_fn at 0x7f320f0dc280>, headers_tmpl={'call': {'Accept': 'application/json', 'Authorization': 'Bearer {api_key}', 'User-Agent': 'langchain-nvidia-ai-endpoints'}, 'stream': {'Accept': 'text/event-stream', 'content-type': 'application/json', 'Authorization': 'Bearer {api_key}', 'User-Agent': 'langchain-nvidia-ai-endpoints'}}), model='gpt-3.5-turbo')

In [117]:
hyde_template = """Even if you do not know the full answer, generate a one-paragraph hypothetical answer to the below question:

{question}"""
hyde_prompt = ChatPromptTemplate.from_template(hyde_template)
hyde_query_transformer = hyde_prompt | model | StrOutputParser()

In [118]:
from langchain_core.runnables import chain

@chain
def hyde_retriever(question):
    hypothetical_document = hyde_query_transformer.invoke({"question": question})
    return retriever.invoke(hypothetical_document)

In [119]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
answer_chain = prompt | model | StrOutputParser()

In [120]:
@chain
def final_chain(question):
    documents = hyde_retriever.invoke(question)
    for s in answer_chain.stream({"question": question, "context": documents}):
        yield s

In [121]:
for s in final_chain.stream("instead of relying on foreign supply chains, what should we rely on?"):
    print(s, end="")

IndexError: list index out of range