### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [2]:
## Install Conda package

In [4]:
#!pip install FlagEmbedding

In [1]:
import ads
import pandas as pd
import json

from FlagEmbedding import FlagReranker

# to save in the Model Catalog
from ads.model.generic_model import GenericModel
from ads.model.model_metadata import MetadataCustomCategory

In [138]:
# !odsc conda init -b conda_environment_yolov5 -n frqap2zhtzbe -a resource_principal
# !odsc conda publish -s tensorflow28_p38_gpu_v1 --force

In [14]:
#see example payload
f = open('input_json.json')
payload = json.load(f)

## **Download model**

In [2]:
class Reranker:
    def __init__(self, model_id):
        self.model_id = model_id
        self.reranker = FlagReranker(self.model_id, use_fp16=True)

    def predict(self, x):
        # x is expected as a list of list of str
        # [["x1", "x2"]] -> y = [score12]
        scores = self.reranker.compute_score(x)

        return scores
    
    
    
model = Reranker(model_id="BAAI/bge-reranker-large")

2024-01-29 15:25:06.514736: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-29 15:25:06.544210: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## **Model artifacts**

In [3]:

custom_conda = "oci://conda_environment_yolov5@frqap2zhtzbe/conda_environments/gpu/TensorFlow 2.8 for GPU on Python 3.8/1.0/tensorflow28_p38_gpu_v1"

reranker_model = GenericModel(estimator=model, artifact_dir="./reranker_dir")

reranker_model.prepare(
    reload=False,
    inference_conda_env=custom_conda,
    inference_python_version="3.9",
    model_file_name="reranker.pkl",
    force_overwrite=True,
)

                                                                                                                                                                                                                                          ?, ?it/s]

algorithm: null
artifact_dir:
  /home/datascience/1_projects/re_ranking_juan/reranker_dir:
  - - reranker.pkl
    - score.py
    - runtime.yaml
    - test_json_output.json
    - .model-ignore
    - .ipynb_checkpoints
    - .ipynb_checkpoints/score-checkpoint.py
    - .ipynb_checkpoints/test_json_output-checkpoint.json
framework: null
model_deployment_id: null
model_id: null

## **Define full script**

In [10]:
%%writefile ./reranker_dir/score.py

import json
import os
import cloudpickle
import pandas as pd
import numpy as np
from functools import lru_cache



#model_name = './reranker_dir/reranker.pkl'


def load_model():
    class DummyModel:
        def __init__(self):
            pass
    return DummyModel()


def predict(data, model=load_model()):
    
    model_name = 'reranker.pkl'
    
    with open(model_name, "rb") as file:
        model = cloudpickle.load(file)

    payload = data
    
    #get the origial question
    original_question = payload['full_input']['original_question']
    
        ##create payload as expected by rerank model

    payload_list = []
    

    for rowx in payload['full_input']['result']:

        text = rowx['payload']['text']

        payload_list.append([original_question, text])

    #apply list to rerank model
    rerank_predictions = model.predict(payload_list)

    loopx = 0

    #create output list
    output_list = []
    for rowyy in rerank_predictions:

        #get the text
        output_text = payload['full_input']['result'][loopx]['payload']['text']

        #add score
        output_list.append([output_text, rowyy])

        loopx+=1

    ########################
    #load as dataframe and sort on score
    df = pd.DataFrame(output_list, columns=["text","score"])
    df = df.sort_values('score', ascending=False)

    df_to_json = df.to_json(orient = 'records')
    
    return {'prediction': df_to_json}

Overwriting ./reranker_dir/score.py


In [4]:
predict(payload)

{'prediction': '[{"text":"\\nNo databases that run on-premises or in cloud environments today are 100% autonomous - but that is\\nthe goal toward which the industry is headed. To further the evolution of cloud databases toward this\\ntrue utility model, Oracle introduced the Autonomous Database, running on Oracle Database (version\\n18c and later) in the Oracle Cloud. Autonomous Database minimizes or eliminates human labor using\\nself-driving, self-securing and self-repairing functionality. Two key areas that comprise the self-\\nrepairing capabilities of the Autonomous Database are the Oracle Maximum Availability Architecture","score":4.0959706306},{"text":"\\nor eliminate operational disruption.\\nWhat is the Autonomous Database Cloud?\\nAUTONOMOUS\\nDATABASE\\nORACLE\\n-","score":1.7019958496},{"text":"\\nTable of Contents\\nIntroduction\\n4\\nWhat is an Autonomous Database?\\n4","score":1.257212162},{"text":"\\nif you will. As a result, enterprises are unable to realize the full o

In [11]:
reranker_model.introspect()

['.ipynb_checkpoints', 'reranker.pkl', '__pycache__', 'score.py', 'runtime.yaml', 'test_json_output.json', '.model-ignore']


Unnamed: 0,Test key,Test name,Result,Message
0,runtime_env_path,Check that field MODEL_DEPLOYMENT.INFERENCE_ENV_PATH is set,Passed,
1,runtime_env_python,Check that field MODEL_DEPLOYMENT.INFERENCE_PYTHON_VERSION is set to a value of 3.6 or higher,Passed,
2,runtime_path_exist,Check that the file path in MODEL_DEPLOYMENT.INFERENCE_ENV_PATH is correct.,Passed,
3,runtime_version,Check that field MODEL_ARTIFACT_VERSION is set to 3.0,Passed,
4,runtime_yaml,"Check that the file ""runtime.yaml"" exists and is in the top level directory of the artifact directory",Passed,
5,score_load_model,Check that load_model() is defined,Passed,
6,score_predict,Check that predict() is defined,Passed,
7,score_predict_arg,Check that all other arguments in predict() are optional and have default values,Passed,
8,score_predict_data,"Check that the only required argument for predict() is named ""data""",Passed,
9,score_py,"Check that the file ""score.py"" exists and is in the top level directory of the artifact directory",Passed,


In [12]:
# Saving the model artifact to the model catalog. 
catalog_entry = reranker_model.save(display_name='rerank_v3', description='rerank_v3', timeout=600, bucket_uri="oci://conda_environment_yolov5@frqap2zhtzbe/config/")
catalog_entry.id

['reranker.pkl', 'score.py', 'runtime.yaml', 'test_json_output.json', '.model-ignore']


loop1:   0%|          | 0/7 [00:00<?, ?it/s]

/tmp/tmp1ius8yfy.zip has been successfully uploaded to oci://conda_environment_yolov5@frqap2zhtzbe/config/ocid1.datasciencemodel.oc1.eu-frankfurt-1.amaaaaaangencdyaz42hnqocq4hd7wdgm32iumzdx22a3rzkeexz3gmyimaq.zip.


loop1:   0%|          | 0/2 [00:00<?, ?it/s]

ERROR - Exception
Traceback (most recent call last):
  File "/home/datascience/conda/tensorflow28_p38_gpu_v1/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_29148/3289160801.py", line 3, in <cell line: 3>
    catalog_entry.id
AttributeError: 'str' object has no attribute 'id'
AttributeError: 'str' object has no attribute 'id'

In [15]:
catalog_entry

'ocid1.datasciencemodel.oc1.eu-frankfurt-1.amaaaaaangencdyae7qnsghxrr56x7zht57qeriuvodzs5va3wxukpmeh3gq'

In [None]:
#see example payload
f = open('input_json.json')
payload = json.load(f)

In [20]:
payload = {'full_input': {'original_question': 'When do you go home?',
  'result': [{'id': 5469,
    'version': 1,
    'score': 0.7420407,
    'payload': {'creation_date': '2024-01-29Z',
     'document_name': 'autonomous-database-self-repairing-5116047 (1).pdf',
     'hyperlink_url': '',
     'page_number': 3,
     'text': '\nTable of Contents\nIntroduction\n4\nWhat is an Autonomous Database?\n4',
     'type': 'pdf'},
    'vector': None},
   {'id': 5483,
    'version': 2,
    'score': 0.7236079,
    'payload': {'creation_date': '2024-01-29Z',
     'document_name': 'autonomous-database-self-repairing-5116047 (1).pdf',
     'hyperlink_url': '',
     'page_number': 5,
     'text': '\nor eliminate operational disruption.\nWhat is the Autonomous Database Cloud?\nAUTONOMOUS\nDATABASE\nORACLE\n-',
     'type': 'pdf'},
    'vector': None},
   {'id': 5542,
    'version': 5,
    'score': 0.67869973,
    'payload': {'creation_date': '2024-01-29Z',
     'document_name': 'autonomous-database-self-repairing-5116047 (1).pdf',
     'hyperlink_url': '',
     'page_number': 12,
     'text': '\nNo databases that run on-premises or in cloud environments today are 100% autonomous - but that is\nthe goal toward which the industry is headed. To further the evolution of cloud databases toward this\ntrue utility model, Oracle introduced the Autonomous Database, running on Oracle Database (version\n18c and later) in the Oracle Cloud. Autonomous Database minimizes or eliminates human labor using\nself-driving, self-securing and self-repairing functionality. Two key areas that comprise the self-\nrepairing capabilities of the Autonomous Database are the Oracle Maximum Availability Architecture',
     'type': 'pdf'},
    'vector': None},
   {'id': 5496,
    'version': 2,
    'score': 0.67500746,
    'payload': {'creation_date': '2024-01-29Z',
     'document_name': 'autonomous-database-self-repairing-5116047 (1).pdf',
     'hyperlink_url': '',
     'page_number': 6,
     'text': "\nbehind corporate firewalls to meet data sovereignty or control requirements will soon be able to run\nthe Autonomous Database on-premises. Oracle Exadata Cloud at Customer, an Oracle Public Cloud\noffering, can be deployed on-premises, and delivers all of the capabilities of Autonomous Database\nfrom within the enterprise's data center.\nThe Autonomous Database can be deployed in a hybrid cloud or all-cloud model; for example, when\nmultiple databases are deployed for production and test environments or as primary and standby",
     'type': 'pdf'},
    'vector': None},
   {'id': 5476,
    'version': 1,
    'score': 0.6721611,
    'payload': {'creation_date': '2024-01-29Z',
     'document_name': 'autonomous-database-self-repairing-5116047 (1).pdf',
     'hyperlink_url': '',
     'page_number': 4,
     'text': '\nif you will. As a result, enterprises are unable to realize the full operational and financial benefits of the\ncloud.\nWHAT IS AN AUTONOMOUS DATABASE?\nThere is understandably an element of confusion that arises when talking about automatic" versus\n\'autonomous" capabilities. A process for database backup, failover or resizing that can be\naccomplished automatically is still not autonomous if a database administrator has to respond to an',
     'type': 'pdf'},
    'vector': None}],
  'status': 'ok',
  'time': 0.00130104}}

In [21]:
import requests
import oci
from oci.signer import Signer
import json

uri = f"https://modeldeployment.eu-frankfurt-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.eu-frankfurt-1.amaaaaaangencdyaeibpd24ie2ap7lmyxeiatfntl6qpoy3nejefqxvtpa5a/predict"


config = oci.config.from_file("~/.oci/config") # replace with the location of your oci config file
auth = Signer(
        tenancy=config['tenancy'],
        user=config['user'],
        fingerprint=config['fingerprint'],
        private_key_file_location=config['key_file'],
        pass_phrase=config['pass_phrase'])


#POST request to the model
response = requests.post(uri, json=payload, auth=auth)
print(response)
full_response = (json.loads(response.content))
print(full_response)

<Response [200]>
{'prediction': '[{"text":"\\nNo databases that run on-premises or in cloud environments today are 100% autonomous - but that is\\nthe goal toward which the industry is headed. To further the evolution of cloud databases toward this\\ntrue utility model, Oracle introduced the Autonomous Database, running on Oracle Database (version\\n18c and later) in the Oracle Cloud. Autonomous Database minimizes or eliminates human labor using\\nself-driving, self-securing and self-repairing functionality. Two key areas that comprise the self-\\nrepairing capabilities of the Autonomous Database are the Oracle Maximum Availability Architecture","score":-8.0765218735},{"text":"\\nbehind corporate firewalls to meet data sovereignty or control requirements will soon be able to run\\nthe Autonomous Database on-premises. Oracle Exadata Cloud at Customer, an Oracle Public Cloud\\noffering, can be deployed on-premises, and delivers all of the capabilities of Autonomous Database\\nfrom within