# LLM Examples

In [None]:
from transformers import pipeline

In [None]:
classifier = pipeline('sentiment-analysis')   # task you are interested in
res = classifier('I think this is not bad at all!')

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [None]:
print(res)

[{'label': 'POSITIVE', 'score': 0.999045193195343}]


## Text Generation

In [None]:
generator = pipeline('text-generation', model='distilgpt2')
res = generator(
    "I think this is a good day to",
    max_length=30,
    num_return_sequences = 3
)
print(res)

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "I think this is a good day to get to work, I've been to school and my mom's a huge cheerleader. I'm pretty sure"}, {'generated_text': 'I think this is a good day to be working with as a member of the KA in 2017, and I hope you will like this year as'}, {'generated_text': "I think this is a good day to get to know your wife. I don't want to give her the opportunity to just sit back and watch the"}]


## Zero Shot classification

In [None]:
classifier = pipeline('zero-shot-classification')
res = classifier(
    "I think arduino is better that raspberry pi",
    candidate_labels=['informatics','fruits','politics'])

No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [None]:
res

{'sequence': "When I eat raspberry I'm happy",
 'labels': ['fruits', 'informatics', 'politics'],
 'scores': [0.988967776298523, 0.008594643324613571, 0.0024376523215323687]}

In [None]:
res = classifier(
    "When I eat raspberry I'm happy",
    candidate_labels=['informatics','fruits','politics'])
res

{'sequence': "When I eat raspberry I'm happy",
 'labels': ['fruits', 'informatics', 'politics'],
 'scores': [0.988967776298523, 0.008594643324613571, 0.0024376523215323687]}

other tasks for pipeline(): https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.pipeline.task

In [None]:
#from transformers import pipeline
#classifier = pipeline('sentiment-analysis')   # task you are interested in
#res = classifier('I think this is not bad at all!')

from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_name = 'distilbert-base-uncased-finetuned-sst-2-english'
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

classifier =  pipeline('sentiment-analysis', model = model, tokenizer=tokenizer)

res = classifier('I think this is not bad at all!')
print(res)

[{'label': 'POSITIVE', 'score': 0.999045193195343}]


In [None]:
sentence = "I'd say Transformer library is cool"
res = tokenizer(sentence)
print(res)

{'input_ids': [101, 1045, 1005, 1040, 2360, 10938, 2121, 3075, 2003, 4658, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [None]:
tokens = tokenizer.tokenize(sentence)
print(tokens)

['i', "'", 'd', 'say', 'transform', '##er', 'library', 'is', 'cool']


In [None]:
ids = tokenizer.convert_tokens_to_ids(tokens)
print(ids)

[1045, 1005, 1040, 2360, 10938, 2121, 3075, 2003, 4658]


In [None]:
tokenizer.decode(ids)

"i'd say transformer library is cool"

## other models
https://huggingface.co/models

In [None]:
generator = pipeline("text-generation", model="GroNLP/gpt2-medium-italian-embeddings")
generator("Io penso che oggi sia",
    max_length=30,
    num_return_sequences = 3
          )


[{'generated_text': "Io penso che oggi sia il momento in cui l'Europa si puٍ cominciare a fare i conti con la realtà. La crisi del nostro sistema economico"},
 {'generated_text': "Io penso che oggi sia il momento in cui l'Europa si puٍ cominciare a fare i conti con la realtà. La crisi del nostro sistema produttivo"},
 {'generated_text': "Io penso che oggi sia il momento in cui l'Europa si puٍ cominciare a fare i conti con la realtà del mondo e al futuro. E"}]