## Creating Custom Pipelines in Hugging Face

In [None]:
from transformers import pipeline

#* Text classification
my_pipeline = pipeline('text-classification', model="distilbert-base-uncased-finetuned-sst-2-english")
print(my_pipeline("Marin is typically late by 2 hours from the expected time."))
print(my_pipeline("Toni is always on time."))

#* Question Answering
model_name = "deepset/roberta-base-squad2"
context = 'There is hereby created a Professional Regulatory Board of Electronics Engineering, hereinafter referred to as the Board, under the administrative control and supervision of the Professional Regulation Commission, hereinafter referred to as the Commission, composed of a chairman and two (2) members who shall be appointed by the President of the Philippines from the three (3) recommendees per position chosen and ranked by the Commission, which recommendees shall in turn be chosen from the five (5) nominees for each position submitted by the accredited professional organization, in accordance with rules and regulations presently in existence or that may be promulgated for such purpose.'

qa_pipeline = pipeline('question-answering',model=model_name, tokenizer=model_name)
QA_Input = {
    'question':'What is the composition of the board in Electronics Engineering Regulatory Board?',
    'context' :context
}
print(qa_pipeline(QA_Input)['answer'])

QA_Input = {
    'question':'What will the president use as basis in appointing the board?',
    'context' :context
}
print(qa_pipeline(QA_Input)['answer'])

Device set to use cuda:0


[{'label': 'NEGATIVE', 'score': 0.9892070293426514}]
[{'label': 'POSITIVE', 'score': 0.9987523555755615}]


Device set to use cuda:0


a chairman and two (2) members
three (3) recommendees per position chosen and ranked by the Commission


In [16]:
original_text = '\nGreece has many islands, with estimates ranging from somewhere around 1,200 to 6,000, depending on the minimum size to take into account. The number of inhabited islands is variously cited as between 166 and 227.\nThe Greek islands are traditionally grouped into the following clusters: the Argo-Saronic Islands in the Saronic Gulf near Athens; the Cyclades, a large but dense collection occupying the central part of the Aegean Sea; the North Aegean islands, a loose grouping off the west coast of Turkey; the Dodecanese, another loose collection in the southeast between Crete and Turkey; the Sporades, a small tight group off the coast of Euboea; and the Ionian Islands, chiefly located to the west of the mainland in the Ionian Sea. Crete with its surrounding islets and Euboea are traditionally excluded from this grouping.\n'

#* Text summarization

# Create the summarization pipeline
summarizer = pipeline(task="summarization", model="cnicu/t5-small-booksum",max_length=50)

# Summarize the text
summary_text = summarizer(original_text)

# Compare the length
print(f"Original text length: {len(original_text)}")
print(f"Summary length: {summary_text[0]}")

Device set to use cuda:0


Original text length: 829
Summary length: {'summary_text': 'Greece has many islands, with estimates ranging from around 1,200 to 6,000 depending on the minimum size to take into account. The number of inhabited islands is variously cited as between 166 and 227. The Greek islands are'}


## Auto Models and Tokenizers

- More control over behavior. Allows control of configuration.
- The tokenizer clean input and split text into tokens

In [23]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")

qa_pipeline = pipeline(task='question-answering',model=model,tokenizer=tokenizer)

QA_Input = {
    'question':'What is the composition of the board in Electronics Engineering Regulatory Board?',
    'context' :context
}
print(qa_pipeline(QA_Input)['answer'])

QA_Input = {
    'question':'What will the president use as basis in appointing the board?',
    'context' :context
}
print(qa_pipeline(QA_Input)['answer'])

Device set to use cuda:0


a chairman and two (2) members
three (3) recommendees per position chosen and ranked by the Commission
