In [2]:
from transformers import pipeline

# 使用本地下载的模型文件
BASE_FOLDER = '/Users/mtide/Documents/code/AI/models/'

# 1. 情感分析
MDL_NAME = 'lxyuan/distilbert-base-multilingual-cased-sentiments-student'
pipe = pipeline('sentiment-analysis',
    model=BASE_FOLDER + MDL_NAME
)

r = pipe("今儿上海可真冷啊")
print(r)

r = pipe("我觉得这家店蒜泥白肉的味道一般")
print(r)

r = pipe("你学东西真的好快，理论课一讲就明白了")
print(r)

r = pipe("You learn things really quickly. You understand the theory class as soon as it is taught.")
print(r)

r = pipe("Today Shanghai is really cold.")
print(r)

[{'label': 'negative', 'score': 0.6657696962356567}]
[{'label': 'neutral', 'score': 0.6030055284500122}]
[{'label': 'positive', 'score': 0.9461327791213989}]
[{'label': 'positive', 'score': 0.7639099359512329}]
[{'label': 'negative', 'score': 0.7824517488479614}]


In [3]:
text_list = [
    "Today Shanghai is really cold.",
    "I think the taste of the garlic mashed pork in this store is average.",
    "You learn things really quickly. You understand the theory class as soon as it is taught.",
    "你学东西真的好快，理论课一讲就明白了"
]

pipe(text_list)

[{'label': 'negative', 'score': 0.7824517488479614},
 {'label': 'negative', 'score': 0.37749969959259033},
 {'label': 'positive', 'score': 0.7639099359512329},
 {'label': 'positive', 'score': 0.9461327791213989}]

In [13]:
# 2. 命名实体识别（NER）

# MDL_NAME = 'dbmdz/bert-large-cased-finetuned-conll03-english'
MDL_NAME = 'dslim/bert-base-NER'
classifier = pipeline(task="ner",
                model=BASE_FOLDER + MDL_NAME)

preds = classifier("Hugging Face is a French company based in New York City.")
preds = [
    {
        "entity": pred["entity"],
        "score": round(pred["score"], 4),
        "index": pred["index"],
        "word": pred["word"],
        "start": pred["start"],
        "end": pred["end"],
    }
    for pred in preds
]
print(*preds, sep="\n")

Some layers from the model checkpoint at /Users/mtide/Documents/code/AI/models/dslim/bert-base-NER were not used when initializing TFBertForTokenClassification: ['dropout_37']
- This IS expected if you are initializing TFBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForTokenClassification were initialized from the model checkpoint at /Users/mtide/Documents/code/AI/models/dslim/bert-base-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForTokenClassification for predictions without further training.


{'entity': 'B-ORG', 'score': 0.8935, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
{'entity': 'I-ORG', 'score': 0.915, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
{'entity': 'I-ORG', 'score': 0.9777, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
{'entity': 'B-MISC', 'score': 0.9996, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
{'entity': 'B-LOC', 'score': 0.9995, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
{'entity': 'I-LOC', 'score': 0.9994, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
{'entity': 'I-LOC', 'score': 0.9996, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}


In [5]:
classifier = pipeline(task="ner", model=BASE_FOLDER + MDL_NAME, grouped_entities=True)
classifier("Hugging Face is a French company based in New York City.")

[{'entity_group': 'positive',
  'score': 0.37662995,
  'word': 'Hu',
  'start': 0,
  'end': 2},
 {'entity_group': 'neutral',
  'score': 0.35554093,
  'word': '##gging',
  'start': 2,
  'end': 7},
 {'entity_group': 'positive',
  'score': 0.38128707,
  'word': 'Face is a French company based in New York City.',
  'start': 8,
  'end': 56}]

In [6]:
# 3. Question Answering
MDL_NAME = 'google-bert/bert-large-cased-whole-word-masking-finetuned-squad'

question_answerer = pipeline("question-answering", model=BASE_FOLDER + MDL_NAME)

preds = question_answerer(
    question="What is the name of the repository?",
    context="The name of the repository is huggingface/transformers",
)
print(
    f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
)

All model checkpoint layers were used when initializing TFBertForQuestionAnswering.

All the layers of TFBertForQuestionAnswering were initialized from the model checkpoint at /Users/mtide/Documents/code/AI/models/google-bert/bert-large-cased-whole-word-masking-finetuned-squad.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForQuestionAnswering for predictions without further training.


score: 0.9517, start: 30, end: 54, answer: huggingface/transformers


In [7]:
# 4. Summarization
summarizer = pipeline(task="summarization",
                      model="google/flan-t5-xl",
                      min_length=8,
                      max_length=32,
)
summarizer(
    """
    In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, 
    replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. 
    For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. 
    On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. 
    In the former task our best model outperforms even all previously reported ensembles.
    """
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[{'summary_text': 'We present the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-de'}]

In [12]:
# 5. Audio classification
MDL_NAME = 'superb/hubert-base-superb-ks'

classifier = pipeline(task="audio-classification", model=BASE_FOLDER + MDL_NAME)

preds = classifier(BASE_FOLDER + '../data/mlk.flac')
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
preds

Some weights of the model checkpoint at /Users/mtide/Documents/code/AI/models/superb/hubert-base-superb-ks were not used when initializing HubertForSequenceClassification: ['hubert.encoder.pos_conv_embed.conv.weight_g', 'hubert.encoder.pos_conv_embed.conv.weight_v']
- This IS expected if you are initializing HubertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing HubertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at /Users/mtide/Documents/code/AI/models/superb/hubert-base-superb-ks and are newly initialized: ['hubert.encoder.pos_conv_embed.conv.paramet

[{'score': 0.8031, 'label': '_unknown_'},
 {'score': 0.055, 'label': 'right'},
 {'score': 0.046, 'label': 'down'},
 {'score': 0.0349, 'label': 'go'},
 {'score': 0.0269, 'label': 'no'}]

In [14]:
# 7. Image Classificaiton
MDL_NAME = 'microsoft/resnet-50'

classifier = pipeline(task="image-classification", model=BASE_FOLDER + MDL_NAME)

preds = classifier(
    BASE_FOLDER + '../data/pipeline-cat-chonk.jpeg'
)
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
print(*preds, sep="\n")

All model checkpoint layers were used when initializing TFResNetForImageClassification.

All the layers of TFResNetForImageClassification were initialized from the model checkpoint at /Users/mtide/Documents/code/AI/models/microsoft/resnet-50.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFResNetForImageClassification for predictions without further training.


{'score': 0.5874, 'label': 'lynx, catamount'}
{'score': 0.1289, 'label': 'tabby, tabby cat'}
{'score': 0.075, 'label': 'marmot'}
{'score': 0.0382, 'label': 'badger'}
{'score': 0.0131, 'label': 'Egyptian cat'}
