# HuggingFace Transformers

Solving tiypical Natural Language Processing tasks with Transformers

Made by [Artem Konevskikh](https://aiculedssul.net/)

In [None]:
#@title Install
!pip install transformers

import os
import torch
import pandas as pd
from transformers import pipeline

from google.colab import data_table
from IPython.display import clear_output 




data_table.enable_dataframe_formatter()

DEVICE = torch.device("cuda:0")

## Typical NLP tasks

In [None]:
#@title Named Entity Recognition
#@markdown Named Entity Recognition (NER) is the task of classifying tokens according to a class, for example, identifying a token as a person, an organisation or a location.
ner_pipe = pipeline("ner", aggregation_strategy="simple", use_fast=True)

In [None]:
#@markdown **Find all entities**
text = "Timothy Bloxam Morton (born 19 June 1968) is a professor and Rita Shea Guffey Chair in English at Rice University."  #@param {type:"string"}
is_textfile = False  #@param {type:"boolean"}


if is_textfile:
  if os.path.isfile(text):
    with open(text, "r") as f:
      sequence = f.read()
else:
  sequence = text

entities = ner_pipe(sequence)
df_entities = pd.DataFrame(entities)
df_entities

In [None]:
#@markdown **Print etities of certain type**
entity_dict = {
  "Person": "PER", 
  "Organization": "ORG", 
  "Location": "LOC", 
  "Miscellaneous": "MISC"
}
entity_group = 'Organization' #@param ["Person", "Organization", "Location", "Miscellaneous"]
for entity in entities:
  if entity["entity_group"] == entity_dict[entity_group]:
    print(entity["word"])

In [None]:
#@title Classification
#@markdown **Sentiment Analysis**: Identifying if a text is positive or negative

classifier = pipeline("sentiment-analysis")

In [None]:
text = "Timothy Bloxam Morton (born 19 June 1968) is a professor and Rita Shea Guffey Chair in English at Rice University."  #@param {type:"string"}
is_textfile = False  #@param {type:"boolean"}


if is_textfile:
  if os.path.isfile(text):
    with open(text, "r") as f:
      sequence = f.read()
else:
  sequence = text

sent = classifier(sequence)
print(f"Text is {sent[0]['label']}, with score: {round(sent[0]['score']*100, 2)}%")

In [None]:
#@title Summarization
#@markdown Summarization is the task of summarizing a document or an article into a shorter text. 
summarizer = pipeline("summarization")

In [None]:
#@markdown **Summarize**
text = "/content/ooo.txt"  #@param {type:"string"}
is_textfile = True  #@param {type:"boolean"}
min_length = 30 #@param {type:"integer"}
max_length = 40 #@param {type:"integer"}

if is_textfile:
  if os.path.isfile(text):
    with open(text, "r") as f:
      sequence = f.read()
else:
  sequence = text

summary = summarizer(sequence[:4096], max_length=max_length, min_length=min_length)
print(summary[0]['summary_text'])

In [None]:
#@title Question Answering
#@markdown Question Answering is the task of extracting an answer from a text given a question
question_answerer = pipeline("question-answering")

In [None]:
#@markdown **Answer**
context = "/content/ooo.txt"  #@param {type:"string"}
is_textfile = True  #@param {type:"boolean"}
question = "Who proposed the term Object-oriented ontology?" #@param {type:"string"}

if is_textfile:
  if os.path.isfile(context):
    with open(text, "r") as f:
      sequence = f.read()
else:
  sequence = context

result = question_answerer(question=question, context=sequence)

clear_output()
print(f"Question: {question}")
print(f"Answer: {result['answer']}")

In [None]:
#@title Text Generation
#@markdown In text generation (a.k.a open-ended text generation) the goal is to create a coherent portion of text that is a continuation from the given context. The following example shows how GPT-2 can be used in pipelines to generate text.

text_generator = pipeline("text-generation")


In [None]:
#@markdown **Generate**
prompt = "Object-oriented ontology is" #@param {type:"string"}
max_length = 50 #@param {type:"string"}
temperature = 1 #@param {type:"slider", min:0, max:2, step:0.1}

text = text_generator(prompt, max_length=max_length, do_sample=True, temperature=temperature)

clear_output()
print(text[0]['generated_text'])

In [None]:
#@title Machine Translation
languages = "translation_en_to_fr" #@param ["translation_en_to_fr", "translation_en_to_de"]
translator = pipeline(languages)

In [None]:
text = "Cultural practices and values are implicitly built into all computational systems" #@param {type:"string"}
max_length = 50 #@param {type:"string"}
tr = translator(text, max_length=max_length)
print(tr[0]['translation_text'])

## Read more

You can find more information at [huggingface](https://huggingface.co/) website:

- [Docs](https://huggingface.co/docs)
- [Pipelines](https://huggingface.co/docs/transformers/v4.19.2/en/main_classes/pipelines)

Nice notebook about text generation with huggingface transformers and GPT (in Russian): [here](https://colab.research.google.com/drive/1sD_hQJOi3CrHn7Ba-XuKkHRToxDRRSof?usp=sharing)