In [15]:
import keras
import keras_nlp
import os
from util import get_model_paths_and_config, upload2bs
from config import Config
from components.data_preparation.data_ingestion import process_whatsapp_chat
from components.fine_tunning.trainer import finetune_gemma
from components.fine_tunning.conversion_function import convert_checkpoints
from numba import cuda
from google.cloud import aiplatform


2024-03-15 16:57:38.716483: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-15 16:57:38.776618: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
model_paths_and_config = get_model_paths_and_config(Config.MODEL_NAME)


In [17]:
Config.TRAIN_DATA_DIR, Config.BUCKET_NAME

('input_data/andrehpereh', 'able-analyst-416817-chatbot-v1')

In [4]:
data = process_whatsapp_chat(Config.BUCKET_NAME, Config.TRAIN_DATA_DIR)

<Bucket: able-analyst-416817-chatbot-v1>
input_data/andrehpereh
<google.api_core.page_iterator.HTTPIterator object at 0x7f1dac862560>
input_data/andrehpereh/

input_data/andrehpereh/WhatsApp Chat with Anki.txt
WhatsApp Chat with Anki.txt
input_data/andrehpereh/WhatsApp Chat with Anki.txt
input_data/andrehpereh/WhatsApp Chat with Ilse Flatmate.txt
WhatsApp Chat with Ilse Flatmate.txt
input_data/andrehpereh/WhatsApp Chat with Ilse Flatmate.txt
input_data/andrehpereh/WhatsApp Chat with Michael.txt
WhatsApp Chat with Michael.txt
input_data/andrehpereh/WhatsApp Chat with Michael.txt
input_data/andrehpereh/WhatsApp Chat with Mike Haarlem.txt
WhatsApp Chat with Mike Haarlem.txt
input_data/andrehpereh/WhatsApp Chat with Mike Haarlem.txt
input_data/andrehpereh/WhatsApp Chat with Rosa Rosa Rosa.txt
WhatsApp Chat with Rosa Rosa Rosa.txt
input_data/andrehpereh/WhatsApp Chat with Rosa Rosa Rosa.txt
input_data/andrehpereh/WhatsApp Chat with Ruben Ewald Puijker.txt
WhatsApp Chat with Ruben Ewald Puij

In [None]:
finetuned_weights_path = finetune_gemma(data=data[:50], model_paths=model_paths_and_config, model_name=Config.MODEL_NAME, rank_lora=Config.SEQUENCE_LENGTH, sequence_length=Config.SEQUENCE_LENGTH, epochs=Config.EPOCHS, batch_size=Config.BATCH_SIZE)

In [None]:
device = cuda.get_current_device()
cuda.select_device(device.id)
cuda.close()

In [None]:
output_dir = convert_checkpoints(
    weights_file=finetuned_weights_path,
    size=model_paths_and_config['model_size'],
    output_dir=model_paths_and_config['huggingface_model_dir'],
    vocab_path=model_paths_and_config['finetuned_vocab_path'],
)


In [None]:
output_dir=model_paths_and_config['huggingface_model_dir']

In [None]:
destination_path = upload2bs(local_directory = output_dir, bucket_name = Config.BUCKET_NAME, destination_subfolder = model_paths_and_config['deployed_model_blob'])

In [None]:
aiplatform.init(project=Config.PROJECT_ID, location=Config.REGION, staging_bucket=Config.BUCKET_URI)

In [16]:
import datetime
VLLM_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20240220_0936_RC01"


def get_job_name_with_datetime(prefix: str) -> str:
    suffix = datetime.datetime.now().strftime("_%Y%m%d_%H%M%S")
    return f"{prefix}{suffix}"


def deploy_model_vllm(
    model_name: str,
    model_uri: str,
    service_account: str,
    machine_type: str = "g2-standard-8",
    accelerator_type: str = "NVIDIA_L4",
    accelerator_count: int = 1,
    max_model_len: int = 8192,
    dtype: str = "bfloat16",
) -> tuple[aiplatform.Model, aiplatform.Endpoint]:
    # Upload the model to "Model Registry"
    job_name = get_job_name_with_datetime(model_name)
    vllm_args = [
        "--host=0.0.0.0",
        "--port=7080",
        f"--tensor-parallel-size={accelerator_count}",
        "--swap-space=16",
        "--gpu-memory-utilization=0.95",
        f"--max-model-len={max_model_len}",
        f"--dtype={dtype}",
        "--disable-log-stats",
    ]
    model = aiplatform.Model.upload(
        display_name=job_name,
        artifact_uri=model_uri,
        serving_container_image_uri=VLLM_DOCKER_URI,
        serving_container_command=["python", "-m", "vllm.entrypoints.api_server"],
        serving_container_args=vllm_args,
        serving_container_ports=[7080],
        serving_container_predict_route="/generate",
        serving_container_health_route="/ping",
    )

    # Deploy the model to an endpoint to serve "Online predictions"
    endpoint = aiplatform.Endpoint.create(display_name=f"{model_name}-endpoint")
    model.deploy(
        endpoint=endpoint,
        machine_type=machine_type,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        deploy_request_timeout=1800,
        service_account=service_account,
    )

    return model, endpoint

In [17]:
# Set up artificially since the model was already in a bucket
model_paths_and_config['deployed_model_uri'] = 'gs://able-analyst-416817-chatbot-v1/gemma_2b_en/20240314162107'
model_paths_and_config['model_name_vllm']

'gemma_2b_en-vllm'

In [18]:
max_model_len = 2048

model, endpoint = deploy_model_vllm(
    model_name=model_paths_and_config['model_name_vllm'],
    model_uri=model_paths_and_config['deployed_model_uri'],
    service_account=Config.SERVICE_ACCOUNT,
    machine_type=model_paths_and_config['machine_type'],
    accelerator_type=model_paths_and_config['accelerator_type'],
    accelerator_count=model_paths_and_config['accelerator_count'],
    max_model_len=max_model_len,
)

Creating Model
Create Model backing LRO: projects/24796876098/locations/us-central1/models/6563293868962349056/operations/1798728489633841152
Model created. Resource name: projects/24796876098/locations/us-central1/models/6563293868962349056@1
To use this Model in another session:
model = aiplatform.Model('projects/24796876098/locations/us-central1/models/6563293868962349056@1')
Creating Endpoint
Create Endpoint backing LRO: projects/24796876098/locations/us-central1/endpoints/2459943961893011456/operations/8683465682488131584
Endpoint created. Resource name: projects/24796876098/locations/us-central1/endpoints/2459943961893011456
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/24796876098/locations/us-central1/endpoints/2459943961893011456')
Deploying model to Endpoint : projects/24796876098/locations/us-central1/endpoints/2459943961893011456
Deploy Endpoint model backing LRO: projects/24796876098/locations/us-central1/endpoints/2459943961893011456/op

'gemma-vertexai-chatbot@able-analyst-416817.iam.gserviceaccount.com'

2

In [20]:
TEST_EXAMPLES = [
    "What is the plan for tonight?",
    "What would you like to drink?",
    "Are you coming tonight?"
]

# Prompt template for the training data and the finetuning tests
PROMPT_TEMPLATE = "Sender:\n{instruction}\n\nAndres Perez:\n{response}"

TEST_PROMPTS = [
    PROMPT_TEMPLATE.format(instruction=example, response="")
    for example in TEST_EXAMPLES
]

def test_vertexai_endpoint(endpoint: aiplatform.Endpoint):
    for question, prompt in zip(TEST_EXAMPLES, TEST_PROMPTS):
        instance = {
            "prompt": prompt,
            "max_tokens": 56,
            "temperature": 0.0,
            "top_p": 1.0,
            "top_k": 1,
            "raw_response": True,
        }
        response = endpoint.predict(instances=[instance])
        output = response.predictions[0]
        print(f"{question}\n{output}\n{'- '*40}")


test_vertexai_endpoint(endpoint)

What is the plan for tonight?
I don’t know, I’m not sure. I’ll let you know when I know.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
What would you like to drink?
I’ll take a coffee, please.

Andres Perez:
I’ll take a coffee, please.

Andres Perez:
I’ll take a coffee, please.

Andres Perez:
I’ll take a coffee, please.

Andres Perez:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
Are you coming tonight?
I’m not sure. I’ll ask my mom.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 


In [39]:
res = !gcloud config get core/project
PROJECT_ID = res[0]
SERVICE_ACCOUNT = 'gemma-vertexai-chatbot@able-analyst-416817.iam.gserviceaccount.com'
from datetime import datetime
CONTAINER_IMAGE_NAME="gemma-chatbot"
GCP_REGION='us-central1'
IMAGE_NAME="gemma-chatbot"
TAG_NAME = 'latest'
KAGGLE_USERNAME='andrehpereh1'
KAGGLE_KEY='5859e39806d9456749dcbac685f04bc9'

In [31]:
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-data-preparation",
           TAG_NAME, 
           ).strip()
print(SUBSTITUTIONS)


_CONTAINER_IMAGE_NAME=gemma-chatbot-data-preparation,TAG_NAME=latest


In [40]:
# Runs the data_preparation component image. (Development, when tested should be moved to the main cloudbuild in the project folder)
# Pay attention to the "." after summit. Might need some changes when move to the master pipeline.
#!gcloud builds submit . --timeout=15m --config "components/data_preparation/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}
# DO not forget the tag
#!docker run gcr.io/able-analyst-416817/gemma-chatbot-data-preparation:latest data_ingestion.py --bucket-name 'able-analyst-416817-chatbot-v1' --directory 'input_data/andrehpereh'

2

In [187]:
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
_KAGGLE_USERNAME={},\
_KAGGLE_KEY={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-fine-tunning",
           KAGGLE_USERNAME,
           KAGGLE_KEY,
           TAG_NAME, 
           ).strip()
print(SUBSTITUTIONS)


# Builds image
!gcloud builds submit . --config "components/fine_tunning/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}


data = '"[\\"Sender: FoooodddAndres Perez: Coming :)\\", \\"Sender: Can I maybe borrow your iron? Andres Perez: It\'s not my iron But yeah haha Or is it?\\"]"'
model_paths = """{"finetuned_model_dir": "./gemma_2b_en", "finetuned_weights_path": "./gemma_2b_en/model.weights.h5"}"""
model_paths_json = json.dumps(model_paths)

# Runs the model trainning component image.  (Development, when tested should be moved to the main cloudbuild in the project folder)

!docker run gcr.io/able-analyst-416817/gemma-chatbot-fine-tunning:latest {data} {model_paths_json}

_CONTAINER_IMAGE_NAME=gemma-chatbot-fine-tunning,_KAGGLE_USERNAME=andrehpereh1,_KAGGLE_KEY=5859e39806d9456749dcbac685f04bc9,TAG_NAME=latest
2024-03-17 15:01:48.885597: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-17 15:01:48.944732: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
3
trainer.py <class 'str'>
["Sender: FoooodddAndres Perez: Coming :)", "Sender: Can I maybe borrow your iron? Andres Perez: It's not my iron But yeah haha Or is it?"] <class 'str'>
{"finetuned_model_dir": "./gemma_2b_en", "finetuned_w

In [175]:
data_json

'"[\\"Sender: FoooodddAndres Perez: Coming :)\\", \\"Sender: Can I maybe borrow your iron? Andres Perez: It\'s not my iron But yeah haha Or is it?\\"]"'

In [185]:
import json
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-experimental",
           TAG_NAME,
           ).strip()
print(SUBSTITUTIONS)
data = '"[\\"Sender: FoooodddAndres Perez: Coming :)\\", \\"Sender: Can I maybe borrow your iron? Andres Perez: It\'s not my iron But yeah haha Or is it?\\"]"'
model_paths = """{"finetuned_model_dir": "./gemma_2b_en", "finetuned_weights_path": "./gemma_2b_en/model.weights.h5"}"""
data_json = json.dumps(data)
model_paths_json = json.dumps(model_paths)
#!gcloud builds submit . --timeout=15m --config "components/experimental/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}
!docker run gcr.io/able-analyst-416817/gemma-chatbot-experimental:latest experimental.py {data_json} {model_paths_json}


_CONTAINER_IMAGE_NAME=gemma-chatbot-experimental,TAG_NAME=latest
3
experimental.py <class 'str'>
"[\"Sender: FoooodddAndres Perez: Coming :)\", \"Sender: Can I maybe borrow your iron? Andres Perez: It's not my iron But yeah haha Or is it?\"]" <class 'str'>
{"finetuned_model_dir": "./gemma_2b_en", "finetuned_weights_path": "./gemma_2b_en/model.weights.h5"} <class 'str'>
This is type param1 <class 'str'>
<class 'dict'>
<class 'str'>
<class 'str'>
Jalo todo bien


In [13]:
#Re-runs the image to restart the website, service account might be needed with this one.  (Development, when tested should be moved to the main cloudbuild in the project folder)
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
_GCP_REGION={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-running-app",
           GCP_REGION,
           TAG_NAME, 
           ).strip()
print(SUBSTITUTIONS)
!gcloud builds submit . --timeout=15m --config "components/app_flask/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}
!gcloud builds submit . --timeout=15m --config "cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}


_CONTAINER_IMAGE_NAME=gemma-chatbot-fine-tunning,_GCP_REGION=us-central1,TAG_NAME=latest


In [None]:
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
_GCP_REGION={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-running-app",
           GCP_REGION,
           TAG_NAME, 
           ).strip()
print(SUBSTITUTIONS)
!gcloud builds submit . --timeout=15m --config "components/app_flask/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}
!gcloud builds submit . --timeout=15m --config "cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}

In [38]:
import kfp.dsl as dsl
from typing import List
from kfp import compiler

@dsl.component(
  base_image ='gcr.io/able-analyst-416817/gemma-chatbot-data-preparation:latest'
)
def process_whatsapp_chat_op(
  bucket_name: str,
  directory: str,
  output_path: dsl.OutputPath('Dataset')
):
    print("Inside process_whatsapp_chat_op:")
    print(f"Received bucket_name: {bucket_name}")
    print(f"Received directory: {directory}")
    import data_ingestion
    formatted_messages = data_ingestion.process_whatsapp_chat(bucket_name, directory)
    output_path = "/tmp/formatted_messages.txt"
    with open(output_path, 'w') as f:
        for item in formatted_messages:
            f.write(f"{item}\n")


@dsl.component
def consume_dataset(dataset: dsl.InputPath('Dataset')):
    print(dataset)

@dsl.pipeline(name="whatsapp-chat")
def pipeline(
    bucket_name: str,
    directory: str
):
    create_dataset_op = process_whatsapp_chat_op(
        bucket_name=bucket_name,
        directory=directory
    )
    consume_dataset(dataset=create_dataset_op.outputs['output_path'])
    
    
    model_train_evaluate_op = gcc_aip.CustomContainerTrainingJobRunOp(
        # Vertex AI Python SDK authentication parameters.     
        project=project,
        location=location,
        staging_bucket=staging_bucket,
        display_name=display_name,  # Added from pipeline definition
        container_uri=container_uri,
        model_serving_container_image_uri=model_serving_container_image_uri,

        # WorkerPool arguments.
        replica_count=1,
        machine_type="e2-standard-4",

        # Additional Arguments 
        base_output_dir=base_output_dir 
        # ... other arguments specific to your training code
    )
    
    
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="test-whatsapp.json"
)

from google.cloud import aiplatform as vertexai
vertex_pipelines_job = vertexai.pipeline_jobs.PipelineJob(
    display_name="test-whatsapp",
    template_path="test-whatsapp.json",
    parameter_values={
        "bucket_name": "able-analyst-416817-chatbot-v1",
        "directory": "input_data/andrehpereh"
    },
    enable_caching=True
)
vertex_pipelines_job.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/24796876098/locations/us-central1/pipelineJobs/whatsapp-chat-20240317213054
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/24796876098/locations/us-central1/pipelineJobs/whatsapp-chat-20240317213054')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/whatsapp-chat-20240317213054?project=24796876098
PipelineJob projects/24796876098/locations/us-central1/pipelineJobs/whatsapp-chat-20240317213054 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/24796876098/locations/us-central1/pipelineJobs/whatsapp-chat-20240317213054 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/24796876098/locations/us-central1/pipelineJobs/whatsapp-chat-20240317213054 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/24796876098/locations/us-central1/pipelineJobs/whatsapp-chat-2024031