# 使用 Bedrock 导入模型 

In [None]:
!pip install -U sagemaker

In [None]:
import sagemaker
from sagemaker import get_execution_role

sess = sagemaker.Session()
role = get_execution_role()
sagemaker_default_bucket = sess.default_bucket()
region = sess.boto_session.region_name
print("sagemaker_default_bucket:", sagemaker_default_bucket)
print("sagemaker_region:", region)

## 1. 微调后的模型 checkpoint 进行 merge

In [None]:
!pip install transformers peft huggingface_hub

In [4]:
# 先看一下取训练过程中保存的一个checkpoint，用于与原始模型进行merge
!ls ./deepseek_model_finetuned

adapter_config.json	   rng_state_2.pth  special_tokens_map.json
adapter_model.safetensors  rng_state_3.pth  tokenizer_config.json
global_step10		   rng_state_4.pth  tokenizer.json
latest			   rng_state_5.pth  trainer_state.json
README.md		   rng_state_6.pth  training_args.bin
rng_state_0.pth		   rng_state_7.pth
rng_state_1.pth		   scheduler.pt


### 1.1 merge 模型

In [None]:
%%writefile merge_model.py
import torch
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, GenerationConfig
from peft import PeftModel

def apply_lora(model_name_or_path, output_path, lora_path):
    print(f"Loading the base model from {model_name_or_path}")
    base = AutoModelForCausalLM.from_pretrained(
        model_name_or_path, torch_dtype=torch.float16, low_cpu_mem_usage=True
    )
    base_tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

    print(f"Loading the LoRA adapter from {lora_path}")

    lora_model = PeftModel.from_pretrained(
        base,
        lora_path,
        torch_dtype=torch.float16,
    )

    print("Applying the LoRA")
    model = lora_model.merge_and_unload()

    print(f"Saving the target model to {output_path}")
    model.save_pretrained(output_path)


if __name__ == "__main__":
    apply_lora("deepseek-ai/deepseek-coder-6.7b-base", "deepseek_finetuned_merged", "deepseek_model_finetuned")

In [None]:
!python merge_model.py

In [None]:
#可以看到合并后的模型文件输出没有 tokenizer 相关的配置
!ls deepseek_finetuned_merged

### 1.2 修改 config.json

- 将 "max_position_embeddings": 16384 修改成 "max_position_embeddings": 8192,


![config_change](./images/max_pos_change.png)

In [None]:
!sed -i 's/\"max_position_embeddings\"\: 16384/\"max_position_embeddings\"\: 8192/g' deepseek_finetuned_merged/config.json

## 2. 将训练过程中的 tokenizer 相关文件拷贝到合并的模型文件路径下

In [None]:
!cp ./deepseek_model_finetuned/special_tokens_map.json ./deepseek_model_finetuned/tokenizer_config.json ./deepseek_model_finetuned/tokenizer.json ./deepseek_finetuned_merged

In [None]:
!ls ./deepseek_finetuned_merged

## 3. 将 merge 后的完整模型文件上传 S3

In [None]:
!aws s3 cp ./deepseek_finetuned_merged/ s3://{sagemaker_default_bucket}/finetuned-model/deepseek_base6.7B_lora_merged --recursive

In [None]:
# 可以看到模型文件都已经上传
!aws s3 ls s3://{sagemaker_default_bucket}/finetuned-model/deepseek_base6.7B_lora_merged/

## 4. 通过 console 导入到 Bedrock

<!-- ![config_change](./images/import_to_bedrock.png) -->
<img src=./images/import_to_bedrock.png width=800 />

## 5. 测试推理

### 5.1 Console playground 推理

<img src=./images/test_model.jpeg width=800 />

### 5.2 Python SDK 调用测试
- 注意将 model_arn 的值改成部署自己部署在bedrock的 model arn

In [None]:
import boto3
import json

# 注意将 model_arn 的值改成部署自己部署在bedrock的 model arn
model_arn = "Your model arn"

client = boto3.client(service_name="bedrock-runtime")
body = json.dumps({
    'prompt': '#write a quick sort algorithm',
    'max_tokens': 512,
    'top_k': 200,
    'top_p': 0.9,
    'stop': [],
    'temperature': 0.5
})

accept = 'application/json'
contentType = 'application/json'

response = client.invoke_model(body=body, modelId=model_arn, accept=accept, contentType=contentType)

response_body = json.loads(response.get('body').read())

# text
print(response_body.get('outputs')[0]["text"])

## Reference

 - https://aws.amazon.com/cn/blogs/aws/import-custom-models-in-amazon-bedrock-preview/?nc1=h_ls