# 下载并缓存模型

In [1]:
import os

from modelscope.hub.snapshot_download import snapshot_download
#model_dir = snapshot_download('PAI/HQSEG-44K')
model_dir = snapshot_download('langboat/bloom-800m-zh', cache_dir='model')

2024-05-03 20:35:56,356 - modelscope - INFO - PyTorch version 2.3.0 Found.
2024-05-03 20:35:56,358 - modelscope - INFO - Loading ast index from C:\Users\dafei\.cache\modelscope\ast_indexer
2024-05-03 20:35:56,445 - modelscope - INFO - No valid ast index found from C:\Users\dafei\.cache\modelscope\ast_indexer, generating ast index from prebuilt!
2024-05-03 20:35:56,518 - modelscope - INFO - Loading done! Current index file version is 1.14.0, with md5 c942686ad53b71759433c4324931c7b6 and a total number of 976 components indexed
Downloading: 100%|██████████| 801/801 [00:00<?, ?B/s] 
Downloading: 100%|██████████| 3.54k/3.54k [00:00<00:00, 3.65MB/s]
Downloading: 100%|█████████▉| 3.06G/3.06G [01:13<00:00, 44.5MB/s]
Downloading: 100%|██████████| 1.04k/1.04k [00:00<?, ?B/s]
Downloading: 100%|██████████| 96.0/96.0 [00:00<?, ?B/s]
Downloading: 100%|██████████| 2.54M/2.54M [00:00<00:00, 11.3MB/s]
Downloading: 100%|██████████| 288/288 [00:00<?, ?B/s] 


# 下载数据集

In [14]:
import requests
import os
# 定义Github仓库的URL和文件路径
data_url = "http://raw.githubusercontent.com/SCIR-HI/Huatuo-Llama-Med-Chinese/main/data/llama_data.json"
# data_url = "https://api.github.com/repos/SCIR-HI/Huatuo-Llama-Med-Chinese/contents/data/llama_data.json"
data_base_path = "data"
file_path = f"{data_base_path}/llama_data.json"

if not os.path.exists(data_base_path):
    os.makedirs(data_base_path)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
                         'Chrome/112.0.0.0 Safari/537.36'}
                    
response = requests.get(data_url, headers=headers,verify=False)
with open(file_path, "wb") as f:

    content_text = response.text
    content_type = response.headers.get("Content-Type")

    # print(f"text:\n{content_text}")
    # print(f"状态码为: {response.status_code}")

    f.write(response.content)
    f.flush()
    f.close()



# 获取模型 lora 支持扩展层

In [10]:
import torch
from transformers import Conv1D, AutoModelForCausalLM


def get_specific_layer_names(model):
    # Create a list to store the layer names
    layer_names = []

    # Recursively visit all modules and submodules
    for name, module in model.named_modules():
        # Check if the module is an instance of the specified layers
        if isinstance(module, (torch.nn.Linear, torch.nn.Embedding, torch.nn.Conv2d, Conv1D)):
            # model name parsing

            layer_names.append('.'.join(name.split('.')[4:]).split('.')[0])

    return layer_names

model = AutoModelForCausalLM.from_pretrained("model/langboat/bloom-800m-zh",
                                             low_cpu_mem_usage=True,
                                             torch_dtype=torch.half,
                                             device_map="cuda")
print(list(set(get_specific_layer_names(model))))

['', 'dense', 'dense_h_to_4h', 'query_key_value', 'dense_4h_to_h']


# 测试发布模型

In [14]:
from modelscope.hub.snapshot_download import snapshot_download
# https://www.modelscope.cn/models/dafei1288/Jimmy_Med/summary
model_dir = snapshot_download('dafei1288/Jimmy_Med', cache_dir='model')


from transformers import pipeline,AutoTokenizer,AutoModelForCausalLM
import torch

retokenizer = AutoTokenizer.from_pretrained("model/dafei1288/Jimmy_Med")
remodel = AutoModelForCausalLM.from_pretrained("model/dafei1288/Jimmy_Med", 
                                             low_cpu_mem_usage=True,
                                             torch_dtype=torch.half,
                                             device_map="cuda")
repipe = pipeline("text-generation", model=remodel, tokenizer=retokenizer ,truncation=True)

ipt = "Human: {}\n{}".format("关节部位红肿疼痛，排尿困难,怎么办？", "").strip() + "\n\nAssistant: "
print(repipe(ipt, max_length=400, do_sample=True, ))

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


[{'generated_text': 'Human: 关节部位红肿疼痛，排尿困难,怎么办？\n\nAssistant: 考虑风湿热引起，治疗首选甲泼尼龙、人血白蛋白等治疗方案，并进行肾功能和电解质等辅助检查。辅助检查可用腹壁肌电图。'}]
