In [8]:
from transformers import pipeline

# 加载下载好的模型方法
model_path = 'D:/hug_model/text_cls'
classifier = pipeline("sentiment-analysis", model=model_path)

classifier("I've been waiting for a Hugging Face course my whole life.")


# 列表输出
result = classifier(["I love programming in Python",
                     "I hate programming in C++"])
for r in result:
    print(f"label:{r['label']}, with score:{r['score']}")

Device set to use cpu


label:love, with score:0.642368495464325
label:anger, with score:0.7687773108482361


In [None]:
from datasets import load_dataset, Audio

dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")


In [6]:
from modelscope.hub.snapshot_download import snapshot_download
from transformers import AutoTokenizer
model_dir = snapshot_download("Qwen/Qwen3-0.6B")
model_path = "D:/hug_model/models/Qwen/Qwen3-0___6B"
tokenizer = AutoTokenizer.from_pretrained(model_path)
encoding = tokenizer("I love Chengdu!")
print(encoding)

Downloading Model from https://www.modelscope.cn to directory: D:\hug_model\models\Qwen\Qwen3-0.6B


2025-05-05 09:20:06,537 - modelscope - INFO - Creating symbolic link [D:\hug_model\models\Qwen\Qwen3-0.6B].


{'input_ids': [40, 2948, 56707, 1054, 0], 'attention_mask': [1, 1, 1, 1, 1]}


In [10]:
# AutoTokenizer的简要使用
# 使用AutoTokenizer加载一个分词器
from transformers import AutoTokenizer

# model_name = "nlptown/bert-base-uncased-finetuned-sst-2-english" # 这种是使用API自动下载预训练模型
# 预先下载到本地的模型
model_name = "D:/hug_model/nlptown"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 传入待分词的文本
eocoding = tokenizer("I love Chengdu!")
print(eocoding)
# 分词返回字典包括：input_ids:用数字表示的token, attention_mask：应该关注哪些token的指示。


# 也可以接受列表作为输入
pt_batch = tokenizer(
    ["I love Chengdu!", "I hate programming in C++"],
    padding=True,
    truncation=True,
    max_length=512,
    return_tensors="pt"
)
print(pt_batch)

{'input_ids': [101, 151, 11157, 35469, 11652, 106, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}
{'input_ids': tensor([[  101,   151, 11157, 35469, 11652,   106,   102,     0,     0],
        [  101,   151, 39487, 23515, 10104,   145,   116,   116,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1]])}


In [None]:
# AutoModel的简要使用
from transformers import AutoModelForSequenceClassification

model_name = "D:/hug_model/nlptown"

# 加载模型
pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)

# 使用模型进行推理
outputs = pt_model(**pt_batch)

from torch.nn import functional as F

# 模型在 logits 属性输出最终的激活结果. 
# 在 logits 上应用 softmax 函数来查询概率:
pt_predictions = F.softmax(outputs.logits, dim=-1)
print(pt_predictions)




tensor([[0.0048, 0.0044, 0.0194, 0.1275, 0.8439],
        [0.6015, 0.2285, 0.0586, 0.0417, 0.0697]], grad_fn=<SoftmaxBackward0>)


In [13]:
# 保存模型
# 当模型微调完成后，可以使用PreTrainedModel.save_pretrained()
# 把模型和它的分词器保存下来。

pt_save_path = "../models/pt_save_pretrained"
tokenizer.save_pretrained(pt_save_path)
pt_model.save_pretrained(pt_save_path)

# 当下次使用这个模型的时候，可以使用PreTrainedModel.from_pretrained()加载
# pt_model = AutoModelForSequenceClassification.from_pretrained("../models/pt_save_pretrained")




In [None]:
# transformers可以将保存模型加载成为另一个框架模型
from transformers import TFAutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained(pt_save_path)
tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_path, from_pt=True)

In [None]:
# AutoConfig
from transformers import AutoConfig, AutoModel
my_config = AutoConfig.from_pretrained("../models/pt_save_pretrained", n_head=12)

my_model = AutoModel.from_config(my_config) # 从自定义的配置文件加载配置文件


In [None]:
# trainer-pytorch优化训练循环
