## <a name="0">Bias Mitigation for a Translation Service - DIY</a>
    
**Please work top to bottom of this notebook and don't skip sections as this could lead to error messages due to missing code.**


## <a name="step1">Step 1: Import libraries</a>


In [None]:
%%capture

!pip3 install -r requirements.txt --quiet
!pip install sagemaker --quiet --upgrade --force-reinstall

In [None]:
%%capture

import os
import numpy as np
import pandas as pd
from typing import Any, Dict, List, Tuple, Union
from datasets import Dataset, load_dataset, disable_caching
disable_caching() ## disable huggingface cache

from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
from transformers import TextDataset

import torch
from torch.utils.data import Dataset, random_split
from transformers import TrainingArguments, Trainer
import accelerate
import bitsandbytes

from IPython.display import Markdown

!export TOKENIZERS_PARALLELISM=false

import warnings
warnings.filterwarnings('ignore')

## <a name="step2">Step 2: Prepare the training dataset</a>


In [None]:
diy_dataset = load_dataset("csv", 
                                    data_files='data/cda_fae_faer_faer_faerself.csv')['train']
diy_dataset

In [None]:
diy_dataset[0]

## <a name="step2">Step 2.1: Prepare the Prompt</a>


In [None]:
from utils.helpers import INTRO_BLURB, INSTRUCTION_KEY, RESPONSE_KEY, END_KEY, RESPONSE_KEY_NL, DEFAULT_SEED, PROMPT
'''
PROMPT = """{intro}
            {instruction_key}
            {instruction}
            {response_key}
            {response}
            {end_key}"""
'''
Markdown(PROMPT)

In [None]:
def _add_text(rec):
    instruction = rec["instruction"]
    response = rec["response"]

    if not instruction:
        raise ValueError(f"Expected an instruction in: {rec}")

    if not response:
        raise ValueError(f"Expected a response in: {rec}")

    rec["text"] = PROMPT.format(
        instruction=instruction, response=response)

    return rec

In [None]:
diy_dataset = diy_dataset.map(_add_text)
diy_dataset[0]

In [None]:
Markdown(diy_dataset[0]['text'])

### <a name="#step3">Step 3: Load a pretrained LLM</a>


In [None]:
tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-3b", 
                                          padding_side="left")

tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_special_tokens({"additional_special_tokens": 
                              [END_KEY, INSTRUCTION_KEY, RESPONSE_KEY_NL]})

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "databricks/dolly-v2-3b",
    device_map="auto",
    torch_dtype=torch.float16,
    load_in_8bit=True,
)

### <a name="#step3.1">Step 3.1: Prepare model for training</a>


In [None]:
model.resize_token_embeddings(len(tokenizer))

In [None]:
from functools import partial
from utils.helpers import mlu_preprocess_batch

MAX_LENGTH = 256
_preprocessing_function = partial(mlu_preprocess_batch, max_length=MAX_LENGTH, tokenizer=tokenizer)

In [None]:
encoded_diy_dataset = diy_dataset.map(
        _preprocessing_function,
        batched=True,
        remove_columns=["instruction", "response", "text"],
)

processed_dataset = encoded_diy_dataset.filter(lambda rec: len(rec["input_ids"]) < MAX_LENGTH)

In [None]:
split_dataset = processed_dataset.train_test_split(test_size=14, seed=0)
split_dataset

### <a name="#step4">Step 4: Define the trainer and finetuned the LLM</a>


#### <a name="#step4.1">Step 4.1: Define the `LoraConfig` and load LoRA model</a> 


In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training, TaskType

MICRO_BATCH_SIZE = 8  
BATCH_SIZE = 64
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
LORA_R = 256
LORA_ALPHA = 512
LORA_DROPOUT = 0.01

# Define LoRA Config
lora_config = LoraConfig(
                 r=LORA_R,
                 lora_alpha=LORA_ALPHA,
                 lora_dropout=LORA_DROPOUT,
                 bias="none",
                 task_type="CAUSAL_LM"
)

In [None]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

#### <a name="#step4.2">Step 4.2: Define the data collator</a>


In [None]:
from utils.helpers import MLUDataCollatorForCompletionOnlyLM

data_collator = MLUDataCollatorForCompletionOnlyLM(
        tokenizer=tokenizer, mlm=False, return_tensors="pt", pad_to_multiple_of=8
)

#### <a name="#step4.3">Step 4.3: Define the trainer</a>


In [None]:
EPOCHS = 5
LEARNING_RATE = 2e-4
MODEL_SAVE_FOLDER_NAME = "diy-dolly-3b-lora"

training_args = TrainingArguments(
                    output_dir=MODEL_SAVE_FOLDER_NAME,
                    fp16=True,
                    per_device_train_batch_size=1,
                    per_device_eval_batch_size=1,
                    learning_rate=LEARNING_RATE,
                    num_train_epochs=EPOCHS,
                    logging_strategy="steps",
                    logging_steps=100,
                    evaluation_strategy="steps",
                    eval_steps=100, 
                    save_strategy="steps",
                    save_steps=20000,
                    save_total_limit=10,
)

In [None]:
trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=split_dataset['train'],
        eval_dataset=split_dataset["test"],
        data_collator=data_collator,
)
model.config.use_cache = False
trainer.train()

#### <a name="#step4.4">Step 4.4: Save the finetuned model</a>


In [None]:
trainer.model.save_pretrained(MODEL_SAVE_FOLDER_NAME)

In [None]:
trainer.model.config.save_pretrained(MODEL_SAVE_FOLDER_NAME)

In [None]:
tokenizer.save_pretrained(MODEL_SAVE_FOLDER_NAME)

### <a name="#step5">Step 5: Deploy the fine tuned model</a>


### <a name="step5.1">Step 5.1: Instantiate SageMaker parameters</a>


In [None]:
import boto3
import json
import sagemaker.djl_inference
from sagemaker.session import Session
from sagemaker import image_uris
from sagemaker import Model

sagemaker_session = Session()
print("sagemaker_session: ", sagemaker_session)

aws_role = sagemaker_session.get_caller_identity_arn()
print("aws_role: ", aws_role)

aws_region = boto3.Session().region_name
print("aws_region: ", aws_region)

image_uri = image_uris.retrieve(framework="djl-deepspeed",
                                version="0.22.1",
                                region=sagemaker_session._region_name)
print("image_uri: ", image_uri)

### <a name="step6.2">Step 5.2: Create the model artifact</a> ###


In [None]:
%%bash
rm -rf lora_model
mkdir -p lora_model
mkdir -p lora_model/dolly-3b-lora
cp diy-dolly-3b-lora/adapter_config.json lora_model/dolly-3b-lora/
cp diy-dolly-3b-lora/adapter_model.bin lora_model/dolly-3b-lora/

In [None]:
%%writefile lora_model/serving.properties
engine=Python
option.entryPoint=model.py
option.adapter_checkpoint=dolly-3b-lora
option.adapter_name=dolly-lora

In [None]:
%%writefile lora_model/requirements.txt
transformers==4.27.4
accelerate>=0.24.1,<1
peft

### <a name="step5.3">Step 5.3: Create the inference script</a>


In [None]:
%%bash
cp utils/deployment_model.py lora_model/model.py

### <a name="step5.4">Step 5.4: Upload the model artifact to S3</a>


In [None]:
%%bash
tar -cvzf diy_lora_model.tar.gz lora_model/

In [None]:
import boto3
import json
import sagemaker.djl_inference
from sagemaker.session import Session
from sagemaker import image_uris
from sagemaker import Model

s3 = boto3.resource('s3')
s3_client = boto3.client('s3')

s3 = boto3.resource('s3')

# Get the name of the bucket with prefix lab-code
for bucket in s3.buckets.all():
    if bucket.name.startswith('artifact'):
        mybucket = bucket.name
        print(mybucket)
    
response = s3_client.upload_file("diy_lora_model.tar.gz", mybucket, "diy_lora_model.tar.gz")

### <a name="step5.5">Step 5.5: Deploy the Model</a> ###


In [None]:
from time import gmtime, strftime
timestamp_prefix = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

model_data="s3://{}/diy_lora_model.tar.gz".format(mybucket)
model_name=f"diy-model-{timestamp_prefix}"

model = Model(image_uri=image_uri,
              name = model_name,
              model_data=model_data,
              predictor_cls=sagemaker.djl_inference.DJLPredictor,
              role=aws_role)

Note: **The deployment should finish within 10 minutes. If it took longer than that, your endpoint may be failed.**

In [None]:
%%time

#Define the unique name for the endpoint
endpoint_name = f"diy-endpoint-{timestamp_prefix}"

predictor = model.deploy(1, "ml.g4dn.2xlarge", endpoint_name=endpoint_name )