# Automation

In [None]:
from kfp import dsl
from kfp import compiler

# Ignore FutureWarnings in kfp
import warnings
warnings.filterwarnings("ignore", 
                        category=FutureWarning, 
                        module='kfp.*')

In [None]:
!cat pipeline.yaml



#### Automation and Orchestration of a Supervised Tuning Pipeline.

- use an existing Kubeflow Pipeline for Parameter-Efficient Fine-Tuning (PEFT) for a foundation model from Google, called [PaLM 2](https://ai.google/discover/palm2/). 


In [None]:
### these are the same 
### jsonl files from the previous lab
### time stamps have been removed so that 
### the files are consistent for all learners
TRAINING_DATA_URI = "./tune_data_stack_overflow_python_qa.jsonl" 
EVAUATION_DATA_URI = "./tune_eval_data_stack_overflow_python_qa.jsonl"  

In [None]:
### path to the pipeline file to reuse
### the file is provided in your workspace as well
template_path = 'https://us-kfp.pkg.dev/ml-pipeline/\
large-language-model-pipelines/tune-large-model/v2.0.0'

In [None]:
import datetime

In [None]:
date = datetime.datetime.now().strftime("%H:%d:%m:%Y")

In [None]:
MODEL_NAME = f"deep-learning-ai-model-{date}"

In [None]:
TRAINING_STEPS = 200
EVALUATION_INTERVAL = 20

In [None]:
from utils import authenticate
credentials, PROJECT_ID = authenticate() 

In [None]:
REGION = "us-central1"

In [None]:
pipeline_arguments = {
    "model_display_name": MODEL_NAME,
    "location": REGION,
    "large_model_reference": "text-bison@001",
    "project": PROJECT_ID,
    "train_steps": TRAINING_STEPS,
    "dataset_uri": TRAINING_DATA_URI,
    "evaluation_interval": EVALUATION_INTERVAL,
    "evaluation_data_uri": EVAUATION_DATA_URI,
}

```Python
pipeline_root "./"

job = PipelineJob(
        ### path of the yaml file to execute
        template_path=template_path,
        ### name of the pipeline
        display_name=f"deep_learning_ai_pipeline-{date}",
        ### pipeline arguments (inputs)
        parameter_values=pipeline_arguments,
        ### region of execution
        location=REGION,
        ### root is where temporary files are being 
        ### stored by the execution engine
        pipeline_root=pipeline_root,
        ### enable_caching=True will save the outputs 
        ### of components for re-use, and will only re-run those
        ### components for which the code or data has changed.
        enable_caching=True,
)

### submit for execution
job.submit()

### check to see the status of the job
job.state
```

# Safety

In [None]:
import vertexai
from vertexai.language_models import TextGenerationModel

In [None]:
vertexai.init(project = PROJECT_ID,
              location = REGION,
              credentials = credentials)

In [None]:
model = TextGenerationModel.from_pretrained("text-bison@001")

In [None]:
list_tuned_models = model.list_tuned_model_names()
import random
tuned_model_select = random.choice(list_tuned_models)

In [None]:
deployed_model = TextGenerationModel.get_tuned_model\
(tuned_model_select)

In [None]:
PROMPT = "How load file from pickle?"
response = deployed_model.predict(PROMPT)
print(response)
### retrieve the "content" key from the second object
final_output = response._prediction_response[0][0]["content"]
print(final_output)

In [None]:
INSTRUCTION = """\
Please answer the following Stackoverflow question on Python.\
Answer it like\
you are a developer answering Stackoverflow questions.\
Question:
"""
QUESTION = "How can I store my data as pickle file? Python example?"

PROMPT = f"""
{INSTRUCTION} {QUESTION}
"""

In [None]:
final_response = deployed_model.predict(PROMPT)
output = final_response._prediction_response[0][0]["content"]
print(output)

In [None]:
### retrieve the "blocked" key from the 
### "safetyAttributes" of the response
blocked = response._prediction_response[0][0]\
['safetyAttributes']['blocked']
print(blocked)

In [None]:
from pprint import pprint
### retrieve the "safetyAttributes" of the response
safety_attributes = response._prediction_response[0][0]\
['safetyAttributes']
pprint(safety_attributes)

In [None]:
### retrieve the "citations" key from the 
### "citationMetadata" of the response
citation = response._prediction_response[0][0]\
['citationMetadata']['citations']
pprint(citation)