# **GPT2- Deploying model**

## **Load pre-trained model and save model locally**

In [None]:
from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification, AutoTokenizer, TFGPT2LMHeadModel, GPT2Tokenizer
from scipy.special import softmax
import pandas as pd
import numpy as np

In [None]:
!git lfs install
!git clone https://huggingface.co/gpt2

In [None]:
dir_gpt2 = "./gpt2"

model = TFGPT2LMHeadModel.from_pretrained(dir_gpt2)
tokenizer = GPT2Tokenizer.from_pretrained(dir_gpt2)

## **Make prediction from locally loaded model**

In [None]:
text = "Please finnish my sentence, I'm going home and I will "
#text = preprocess(text)
input_ids = tokenizer.encode(text, return_tensors='tf')

greedy_output = model.generate(input_ids, max_length=50)
print("Output:\n" + 100 * '-')
print(tokenizer.decode(greedy_output[0], skip_special_tokens=True))

## **Local prediction again. Serialized as json input. Needed for when deployed**

In [None]:
#make post request
data = {"text": "Hi, how you doing today"}

#fetch text from input
text_input = data['text']


# same script again
input_ids = tokenizer.encode(text_input, return_tensors='tf')

greedy_output = model.generate(input_ids, max_length=50)
print("Output:\n" + 100 * '-')
results = tokenizer.decode(greedy_output[0], skip_special_tokens=True)
print(type(results))

-----

# **Create model artifacts**

In [None]:
from ads.model.framework.tensorflow_model import TensorFlowModel
from ads.common.model_metadata import UseCaseType
from ads.common.model_artifact import ModelArtifact
from ads.common.model_export_util import prepare_generic_model
import os

In [None]:
path_to_artifacts = './testgpt2'

In [None]:
#path to artifacts and conda slug
path_to_artifacts = './testgpt2'
#conda_env = 'oci://conda_environment_yolov5@frqap2zhtzbe/conda_environments/cpu/fdf_conda/1.0/fdf_conda'   #this refers to the published conda location (bucket name, namespace)

#create default artifacts
artifact = prepare_generic_model(
    path_to_artifacts, 
    fn_artifact_files_included=False, 
    force_overwrite=True, 
    inference_conda_env="tensorflow28_p38_gpu_v1")

# **Change the score.py manually**

In [None]:
#copy all files in the model artifacts
!cp -a ./gpt2 ./testgpt2

In [None]:
%%writefile "{path_to_artifacts}/score.py"
import os
import ads
from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification, AutoTokenizer, TFGPT2LMHeadModel, GPT2Tokenizer
import pandas as pd
import numpy as np
import tokenizers
import json
import os

#load model and tokenizer
model_artifacts_folder = "./gpt2"

def load_model():
    class DummyModel:
        def __init__(self):
            pass
    return DummyModel()

#loading the model before seemed to fail. NOt sure why. Now loading in the predict.

def predict(data, model=load_model()):       

    model = TFGPT2LMHeadModel.from_pretrained(model_artifacts_folder)
    tokenizer = GPT2Tokenizer.from_pretrained(model_artifacts_folder)
    
    #fetch text from input
    text_input = data['text']
    
    
    # process text
    input_ids = tokenizer.encode(text_input, return_tensors='tf')
    greedy_output = model.generate(input_ids, max_length=50)
    results = tokenizer.decode(greedy_output[0], skip_special_tokens=True)
    return {'prediction': results}

In [None]:
data = {"text": "Hi, Thanks, i'm very sad. You such"}

predict(data)

## **check the artifacts**

In [None]:
### Bob uses GPU vs CPU conda env

In [None]:
%%writefile "{path_to_artifacts}/runtime.yaml"

# Model runtime environment
MODEL_ARTIFACT_VERSION: '3.0'
MODEL_DEPLOYMENT:
  INFERENCE_CONDA_ENV:
    INFERENCE_ENV_PATH: oci://service-conda-packs@id19sfcrra6z/service_pack/gpu/TensorFlow_2.8_for_GPU_on_Python_3.8/1.0/tensorflow28_p38_gpu_v1
    INFERENCE_ENV_SLUG: tensorflow28_p38_gpu_v1
    INFERENCE_ENV_TYPE: data_science
    INFERENCE_PYTHON_VERSION: '3.8'
MODEL_PROVENANCE:
  PROJECT_OCID: ocid1.datascienceproject.oc1.eu-frankfurt-1.amaaaaaangencdyaik5ssdqk4as2bhldxprh7vnqpk7yycsm7vymd344cgua
  TENANCY_OCID: ocid1.tenancy.oc1..aaaaaaaabu5fgingcjq3vc7djuwsdcutdxs4gsws6h4kfoldqpjuggxprgoa
  TRAINING_COMPARTMENT_OCID: ocid1.compartment.oc1..aaaaaaaae3n6r6hrjipbap2hojicrsvkzatrtlwvsyrpyjd7wjnw4za3m75q
  TRAINING_CONDA_ENV:
    TRAINING_ENV_PATH: oci://service-conda-packs@id19sfcrra6z/service_pack/gpu/TensorFlow_2.8_for_GPU_on_Python_3.8/1.0/tensorflow28_p38_gpu_v1
    TRAINING_ENV_SLUG: tensorflow28_p38_gpu_v1
    TRAINING_ENV_TYPE: data_science
    TRAINING_PYTHON_VERSION: '3.8'
  TRAINING_REGION: eu-frankfurt-1
  TRAINING_RESOURCE_OCID: ocid1.datasciencenotebooksession.oc1.eu-frankfurt-1.amaaaaaangencdyacxmsz5ycch762wjc54udhibtl3m4nacuaf7shrvyoktq
  USER_OCID: ocid1.saml2idp.oc1..aaaaaaaar3ydw5hoiob7dfjzoom2dvbhqkkd5fat6m7upe72emlsxhsfrbfa/bob.peulen@oracle.com
  VM_IMAGE_INTERNAL_ID: NB1480-DCGPU131-VMP64-VMA1585-BI681

In [None]:
#all should be passed
artifact.introspect()

## **Test model from artifacts**

In [None]:
import sys
sys.path.insert(0, path_to_artifacts)
 
# importing load_model() and predict() that are defined in score.py
from score import load_model, predict
 
# Loading the model to memory
_ = load_model()

In [None]:
data = {"text": "Hi, Thanks, i'm very sad. You such"}

predictions_test = predict(data, _)
predictions_test

In [None]:
!ls /home/datascience/.oci/config

## **store in catalog**

In [None]:
import oci
config = oci.config.from_file('/home/datascience/.oci/config', 'DEFAULT')
config

In [None]:
#### deleted some files (tf.. differnet bits files) to reduce to < 6

In [18]:

# Saving the model artifact to the model catalog. 
catalog_entry = artifact.save(display_name='gptsaitest', description='gptsaitest', timeout=600, bucket_uri = "oci://conda_environment_yolov5@frqap2zhtzbe/model_artifacts_large")
catalog_entry.id

                                                                                                         27.0MB/s]

loop1:   0%|          | 0/4 [00:00<?, ?it/s]

'ocid1.datasciencemodel.oc1.eu-frankfurt-1.amaaaaaangencdyauanowud3tyib2c627wqd6c5toehvcmywsfjeczx4wgja'

## **Deploy in the UI**

## **Invoke the model**

In [19]:
import requests
import oci
from oci.signer import Signer

In [20]:
#fdf version mf_final
uri = f"https://modeldeployment.eu-frankfurt-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.eu-frankfurt-1.amaaaaaangencdyasl6dqv3i7lqwh5zhsgamx6n64h5qbjwafqbb7k3nwpla/predict"
print(uri)

https://modeldeployment.eu-frankfurt-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.eu-frankfurt-1.amaaaaaangencdyasl6dqv3i7lqwh5zhsgamx6n64h5qbjwafqbb7k3nwpla/predict


In [21]:
# Using Resource principal to authenticate against the model endpoint. Set using_rps=False if you are using the config+key flow. 
using_rps = False

if using_rps: # using resource principal:     
    auth = oci.auth.signers.get_resource_principals_signer()
else: # using config + key: 
    config = oci.config.from_file("/home/datascience/.oci/config") # replace with the location of your oci config file
    auth = Signer(
        tenancy=config['tenancy'],
        user=config['user'],
        fingerprint=config['fingerprint'],
        private_key_file_location=config['key_file'],
        pass_phrase=config['pass_phrase'])

In [23]:
%%time
import json
print(auth)
#input data
data = {"text": "I had breakfast this morning, and now I'm going to "}

#POST request to the model
response = requests.post(uri, json=data, auth=auth)
print(response)
xx = (json.loads(response.content))
print(xx)
print(xx['prediction'])



<oci.signer.Signer object at 0x7f1d5957e7c0>
<Response [200]>
{'prediction': "I had breakfast this morning, and now I'm going to \xa0have a little bit of a break. I'm going to go to the gym and do some work. I'm going to go to the gym and do some work. I'm"}
I had breakfast this morning, and now I'm going to  have a little bit of a break. I'm going to go to the gym and do some work. I'm going to go to the gym and do some work. I'm
CPU times: user 16.3 ms, sys: 1.19 ms, total: 17.5 ms
Wall time: 5.33 s
