In [22]:
import kfp
import matplotlib.pyplot as plt
import pandas as pd
import requests
from typing import List, Tuple

import os
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
                        OutputPath, ClassificationMetrics, Metrics, component,pipeline)

In [23]:
BUCKET_URI="gs://sbx-196865-genaift-ds-pkgs"
project_id = "sbx-196865-genaift-ds-ccd784e6"
PIPELINE_ROOT = "{}/pipeline_root/".format(BUCKET_URI)
EXPERIMENT_NAME = "test-1"
location = 'us-central1'
service_account="sa-196865-big-data@sbx-196865-genaift-ds-ccd784e6.iam.gserviceaccount.com"
model_display_name = 'tuned_bison001'

In [24]:
__file__ = 'kfp_finetuning.ipynb'
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))

In [25]:
import kfp
import matplotlib.pyplot as plt
import pandas as pd
import requests
from typing import List, Tuple

import os
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
                        OutputPath, ClassificationMetrics, Metrics, component,pipeline)

In [26]:
@component(packages_to_install=['bitsandbytes==0.42.0','peft==0.8.2',
                                'trl==0.7.10','accelerate==0.27.1',
                                'datasets==2.17.0','transformers==4.38.0','huggingface_hub',
                                'google-cloud-storage','google-cloud-aiplatform','google-cloud-pipeline-components',
                                'gcsfs'],
           base_image='gcr.io/deeplearning-platform-release/base-cu113.py310',
           output_component_file=os.path.join(__location__, "model_finetuning.yaml"))
def finetuning():
    import torch
    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
    from datasets import load_dataset
    from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
    import bitsandbytes as bnb
    from peft import LoraConfig, get_peft_model
    import transformers
    from trl import SFTTrainer
    from google.cloud import storage
    from huggingface_hub import login
    import os
    
    login(token='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')
    
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    ) 
    
    model_id = "google/gemma-7b-it"
    model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map='auto') ## changed this line
    tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)
    dataset = load_dataset("TokenBender/code_instructions_122k_alpaca_style", split="train")
    
    def upload_folder_to_gcs(local_folder_path, gcs_bucket, gcs_folder_path=""):
        # Create a GCS client and bucket object
        storage_client = storage.Client()
        bucket = storage_client.bucket(gcs_bucket)

        # Loop through the files in the local folder
        for root, dirs, files in os.walk(local_folder_path):
            for file in files:
                # Construct the local and GCS paths for the file
                local_path = os.path.join(root, file)
                gcs_path = os.path.join(gcs_folder_path, local_path[len(local_folder_path)+1:])

                # Upload the file to GCS
                blob = bucket.blob(gcs_path)
                blob.upload_from_filename(local_path)

        print(f"Folder {local_folder_path} uploaded to GCS bucket '{gcs_bucket}' with path '{gcs_folder_path}'")
    
    def generate_prompt(data_point):
        """Gen. input text based on a prompt, task instruction, (context info.), and answer

        :param data_point: dict: Data point
        :return: dict: tokenzed prompt
        """
        prefix_text = 'Below is an instruction that describes a task. Write a response that ' \
                   'appropriately completes the request.\n\n'
        # Samples with additional context into.
        if data_point['input']:
            text = f"""<start_of_turn>user {prefix_text} {data_point["instruction"]} here are the inputs {data_point["input"]} <end_of_turn>\n<start_of_turn>model{data_point["output"]} <end_of_turn>"""
        # Without
        else:
            text = f"""<start_of_turn>user {prefix_text} {data_point["instruction"]} <end_of_turn>\n<start_of_turn>model{data_point["output"]} <end_of_turn>"""
        return text
    def find_all_linear_names(model):
        cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
        lora_module_names = set()
        for name, module in model.named_modules():
            if isinstance(module, cls):
                names = name.split('.')
                lora_module_names.add(names[0] if len(names) == 1 else names[-1])
            if 'lm_head' in lora_module_names: # needed for 16-bit
                lora_module_names.remove('lm_head')
        return list(lora_module_names)
    text_column = [generate_prompt(data_point) for data_point in dataset]
    dataset = dataset.add_column("prompt", text_column)
    dataset = dataset.shuffle(seed=1234)  # Shuffle dataset here
    dataset = dataset.map(lambda samples: tokenizer(samples["prompt"]), batched=True)
    dataset = dataset.train_test_split(test_size=0.2)
    train_data = dataset["train"]
    test_data = dataset["test"]
    model.gradient_checkpointing_enable()
    model = prepare_model_for_kbit_training(model)
    
    modules = find_all_linear_names(model)
    lora_config = LoraConfig(
        r=64,
        lora_alpha=32,
        target_modules=modules,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM")
    model = get_peft_model(model, lora_config)
    trainable, total = model.get_nb_trainable_parameters()
    print(f"Trainable: {trainable} | total: {total} | Percentage: {trainable/total*100:.4f}%")
    tokenizer.pad_token = tokenizer.eos_token
    torch.cuda.empty_cache()
    trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
        eval_dataset=test_data,
        dataset_text_field="prompt",
        peft_config=lora_config,
        args=transformers.TrainingArguments(
            per_device_train_batch_size=1,
            gradient_accumulation_steps=4,
            warmup_steps=0.03,
            max_steps=100,
            learning_rate=2e-4,
            logging_steps=1,
            output_dir="outputs",
            optim="paged_adamw_8bit",
            save_strategy="epoch",
        ),
        data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
    )
    model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
    trainer.train()
    new_model = "gemma-Code-Instruct-Finetune-test-acn"
    trainer.model.save_pretrained(new_model)
    
    base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},)
    
    merged_model= PeftModel.from_pretrained(base_model, new_model)
    merged_model= merged_model.merge_and_unload()
    
    merged_model.save_pretrained("merged_model",safe_serialization=True)
    tokenizer.save_pretrained("merged_model")
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    
    merged_model.push_to_hub(new_model, use_temp_dir=False)
    tokenizer.push_to_hub(new_model, use_temp_dir=False)
    
    local_path = f"transformers/{new_model}"
    trainer.model.save_pretrained(local_path)
    
    upload_folder_to_gcs(local_path,'19865_finetuned_models','transformers_pretrained_models')

  @component(packages_to_install=['bitsandbytes==0.42.0','peft==0.8.2',
  def finetuning():


In [27]:
from kfp.v2 import compiler
from kfp import components
transformer_component = components.load_component_from_file(
    os.path.join(__location__,'model_finetuning.yaml'))

In [30]:
@pipeline(
    name='gemma-finetuning',
    description='finetuning gemma 7b',
    # needs to be changed based on region/project
    pipeline_root=PIPELINE_ROOT)
def train_pipeline()->None:
    my_task = (transformer_component().set_cpu_limit('12').set_memory_limit('16G').add_node_selector_constraint('cloud-tpus.google.com/v3').set_accelerator_type('NVIDIA_TESLA_T4').set_accelerator_limit(2))
    # my_task5 = 
compiler.Compiler().compile(pipeline_func=train_pipeline, package_path="transformer_finetuning.json")

In [None]:
from google.cloud.aiplatform import pipeline_jobs
from google.cloud import aiplatform as vertex_ai
vertex_ai.init(project=project_id)

job = pipeline_jobs.PipelineJob(
    display_name="transformer-finetuning-pipeline",
    template_path="transformer_finetuning.json",location='us-central1'
)
job.run(service_account=service_account)

Creating PipelineJob
PipelineJob created. Resource name: projects/81995035742/locations/us-central1/pipelineJobs/gemma-finetuning-20240306090709
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/81995035742/locations/us-central1/pipelineJobs/gemma-finetuning-20240306090709')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/gemma-finetuning-20240306090709?project=81995035742
PipelineJob projects/81995035742/locations/us-central1/pipelineJobs/gemma-finetuning-20240306090709 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/81995035742/locations/us-central1/pipelineJobs/gemma-finetuning-20240306090709 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/81995035742/locations/us-central1/pipelineJobs/gemma-finetuning-20240306090709 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/81995035742/locations/us-central1/pipelineJobs/gem