# Fine tune model on T5x and export for inference

In [1]:
import os
import json
from datetime import datetime
from google.cloud import aiplatform as vertex_ai
from kfp.v2 import compiler
import time

In [2]:
# Project definitions
PROJECT_ID = 'renatoleite-dev' # Change to your project id.
REGION = 'us-central1'  # Change to your region.

# Bucket definitions
BUCKET = 'rl-language' # Change to your bucket.

In [3]:
# Bucket definitions
VERSION = 'v01'
MODEL_NAME = 'finetune-en-de'
MODEL_DISPLAY_NAME = f'{MODEL_NAME}-{VERSION}'
WORKSPACE = f'gs://{BUCKET}/{MODEL_DISPLAY_NAME}'

# Docker definitions for training
IMAGE_NAME = 't5x-training'
IMAGE_URI = f'gcr.io/{PROJECT_ID}/{IMAGE_NAME}'

In [4]:
vertex_ai.init(
    project=PROJECT_ID,
    location=REGION,
    staging_bucket=f'gs://{BUCKET}/staging'
)

In [None]:
! gcloud builds submit --tag {IMAGE_URI} --timeout=2h

In [5]:
MACHINE_TYPE = 'cloud-tpu'
ACCELERATOR_TYPE = 'TPU_V3'
ACCELERATOR_NUM = 8
REPLICA_COUNT = 1

In [6]:
# Model dir to save logs, ckpts, etc. in "gs://model_dir" format.
MODEL_DIR = f'gs://{BUCKET}/model/{MODEL_DISPLAY_NAME}'

# Data dir to save the processed dataset in "gs://data_dir" format.
TFDS_DATA_DIR = f'gs://{BUCKET}/dataset/{MODEL_DISPLAY_NAME}'
GIN_FILE = './small_finetune_wmt.gin'

In [7]:
from finetune_pipeline import finetune_pipeline

pipeline_name = 't5x-finetune-export'
compiler.Compiler().compile(
    pipeline_func=finetune_pipeline,
    package_path=f'{pipeline_name}.json')



TypeError: the JSON object must be str, bytes or bytearray, not PipelineParam

In [None]:
worker_pool_specs =  [
    {
        "machine_spec": {
            "machine_type": MACHINE_TYPE,
            "accelerator_type": ACCELERATOR_TYPE,
            "accelerator_count": ACCELERATOR_NUM,
        },
        "replica_count": REPLICA_COUNT,
        "container_spec": {
            "image_uri": IMAGE_URI,
            "command": ["/opt/conda/envs/t5x/bin/python", "/llm/t5x/t5x/train.py"],
            "args": [
                f'--gin_file={GIN_FILE}',
                f'--gin.MODEL_DIR="{MODEL_DIR}"',
                f'--tfds_data_dir={TFDS_DATA_DIR}',
                '--gin.USE_CACHED_TASKS=False'
            ],
        },
    }
]

In [None]:
params = {
}

In [None]:
pipeline_job = vertex_ai.PipelineJob(
    display_name='t5x-finetune-export',
    template_path=f'{pipeline_name}.json',
    pipeline_root=f'gs://{BUCKET_NAME}/pipeline_runs/{pipeline_name}',
    parameter_values=params,
    enable_caching=False,
    labels=labels
)

pipeline_job.run(sync=False)

### Step 5: Explore metrics

After fine-tuning has completed, you can parse metrics into CSV format using the following script:

In [None]:
VAL_DIR="${MODEL_DIR}/inference_eval"
python -m t5.scripts.parse_tb \
  --summary_dir="${VAL_DIR}" \
  --seqio_summaries \
  --out_file="${VAL_DIR}/results.csv" \
  --alsologtostderr