In [None]:
from transformers import pipeline 

# sentiment-analysis

In [None]:
classifier = pipeline("sentiment-analysis")

In [None]:
result = classifier("Today's weather forecast predicts heavy rain and thunderstorms")[0]
print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

In [None]:
result = classifier("Dare to be a champion")[0]
print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

# classification model 

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification
import torch

tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
model = AutoModelForSequenceClassification.from_pretrained("xlm-roberta-base")

classes = ["not pharaprase","is pharaprase"]

sequence_0 = "The company Amazon is based in Seattle City"
sequence_1 = "Oranges are basically bad for your health"
sequence_2 = "HuggingFace's headquarters are situated in Manhattan"

paraphrase = tokenizer(sequence_0, sequence_2, return_tensors="pt")
not_paraphrase = tokenizer(sequence_0,sequence_2, return_tensors="pt")


paraphrase_classification_logits = model(**paraphrase).logits
not_paraphrase_classification_logits = model(**not_paraphrase).logits

paraphrase_results = torch.softmax(paraphrase_classification_logits, dim=1).tolist()[0]
not_paraphrase_results = torch.softmax(not_paraphrase_classification_logits, dim=1).tolist()[0]

for i in range(len(classes)):
    print(f"{classes[i]}: {int(round(paraphrase_results[i] * 100))}%")

for i in range(len(classes)):
    print(f"{classes[i]}: {int(round(not_paraphrase_results[i] * 100))}%")

# question and answering model 

In [None]:
from transformers import pipeline

question_answerer = pipeline("question-answering")

context = "Artificial intelligence (AI) is revolutionizing various industries, including healthcare, finance, and transportation. Its ability to analyze large datasets and perform complex tasks with minimal human intervention has led to significant advancements. Companies are investing heavily in AI research and development to improve efficiency, productivity, and decision-making. However, ethical concerns surrounding AI, such as data privacy and algorithmic bias, remain important considerations in its widespread adoption."

In [None]:
result = question_answerer(question="How is artificial intelligence impacting different industries?", context=context)
print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")

result = question_answerer(question="What are some examples of tasks that artificial intelligence can perform?", context=context)
print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")

# example of question answering using a model and a tokenizer

In [None]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch

tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

text = "The weather department plays a crucial role in forecasting and monitoring meteorological conditions to provide accurate weather predictions. Using advanced technology such as satellites, radar systems, and weather models, meteorologists analyze atmospheric data to forecast temperature, precipitation, wind patterns, and severe weather events. These forecasts are essential for various sectors, including agriculture, aviation, and disaster management, to make informed decisions and mitigate risks associated with adverse weather conditions. Continuous advancements in meteorological science and technology enable the weather department to improve the accuracy and timeliness of weather forecasts, ultimately enhancing public safety and societal resilience to weather-related hazards."

questions = [
    "Does the weather department use satellites and radar systems for forecasting?",
    "Are accurate weather forecasts important for sectors like agriculture and aviation?",
    "Do advancements in meteorological science help improve the accuracy of weather predictions?",
]

for question in questions:
    inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="pt")
    input_ids = inputs["input_ids"].tolist()[0]
    outputs = model(**inputs)
    answer_start_scores = outputs.start_logits
    answer_end_scores = outputs.end_logits
    # Get the most likely beginning of answer with the argmax of the score
    answer_start = torch.argmax(answer_start_scores)
    # Get the most likely end of answer with the argmax of the score 
    answer_end = torch.argmax(answer_end_scores) + 1
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    print(f"Question: {question}")
    print(f"Answer: {answer}")

# Masked Language Modeling

In [None]:
from transformers import pipeline

In [None]:
unimask = pipeline("fill-mask")

In [None]:
pprint(unmasker(f"HuggingFace's initiative involves building a {unmasker.tokenizer.mask_token} to address NLP challenges, including those related to  weather criteria."))

# example of doing masked language modeling using a model and a tokenizer

In [None]:
from transformers import AutoModelForMaskedLM, AutoTokenizer
import torch

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
model = AutoModelForMaskedLM.from_pretrained("distilbert-base-cased")

sequence = "Distilled models are smaller than the models they mimic. Using them instead of the large " \
    f"versions would benefit the entomology department by {tokenizer.mask_token} our understanding of insect behavior."

inputs = tokenizer(sequence, return_tensors="pt")
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]

token_logits = model(**inputs).logits
mask_token_logits = token_logits[0, mask_token_index, :]

top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()

for token in top_5_tokens:
    print(sequence.replace(tokenizer.mask_token, tokenizer.decode([token])))