## Transformers

In [1]:
# pulled 3rd review from https://www.amazon.com/New-Microsoft-Surface-Pro-Touch-Screen/dp/B07YNHYQ5Z/ref=sr_1_3?dchild=1&keywords=microsoft%2Bsurface&qid=1634659964&sr=8-3&th=1

text = r"""
This listing is very misleading and I would not recommend ordering this product from this seller.

The listing clearly says NEW and says it includes a type cover...I did not receive a type cover and the unit looks refurbished, not new. I was expecting to receive it in the manufacturer’s box and shrink wrapped, instead it came in an unmarked brown cardboard box, the surface pro in a flimsy sleeve without even a plastic film on the screen."""

### Classification/Sentiment analysis

In [2]:
from transformers import pipeline
cls = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


In [3]:
import pandas as pd
sent = cls(text)

In [4]:
pd.DataFrame.from_records(sent)

Unnamed: 0,label,score
0,NEGATIVE,0.999176


In [27]:
sent[0]["label"]

'NEGATIVE'

### Named Entity Recognition

In [None]:
# Learn the particular product or service
# Named entities: products, places, people (NER)

In [5]:
from transformers import pipeline

nlp = pipeline("ner")

dol_text = "The regulation was enforced in three cities: New York, Tampa, and Miami." \
            "It will be effective if the Department of Labor reconvenes a meeting in Washington with Obama."

tags = nlp(dol_text)

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english)


In [6]:
# I-LOC: location
# I-ORG: organization
# I-PER: person
# start end: indices of individual chars
# index: location of word in whole string
# score: confidence
pd.DataFrame.from_records(tags)

Unnamed: 0,entity,score,index,word,start,end
0,I-LOC,0.999523,9,New,45,48
1,I-LOC,0.999212,10,York,49,53
2,I-LOC,0.997739,12,Tampa,55,60
3,I-LOC,0.999371,15,Miami,66,71
4,I-ORG,0.999567,23,Department,100,110
5,I-ORG,0.999361,24,of,111,113
6,I-ORG,0.999293,25,Labor,114,119
7,I-LOC,0.998713,33,Washington,144,154
8,I-PER,0.99874,35,Obama,160,165


## Question Answering

In [7]:
read_text = pipeline("question-answering")
question = "Did the customer like the product?"
outputs = read_text(question=question,
                    context=text)

No model was supplied, defaulted to distilbert-base-cased-distilled-squad (https://huggingface.co/distilbert-base-cased-distilled-squad)


In [8]:
pd.DataFrame([outputs])

Unnamed: 0,score,start,end,answer
0,0.453173,37,97,I would not recommend ordering this product fr...


In [9]:
outputs

{'score': 0.4531731605529785,
 'start': 37,
 'end': 97,
 'answer': 'I would not recommend ordering this product from this seller'}

## Text Summarization

In [None]:
# Long text -> short version with key facts

In [10]:
summarize = pipeline("summarization")
summary = summarize(text, max_length=30, clean_up_tokenization_spaces=True)

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 (https://huggingface.co/sshleifer/distilbart-cnn-12-6)


In [11]:
print(summary[0]["summary_text"])

 The listing clearly says NEW and says it includes a type cover. I was expecting to receive it in the manufacturer’s box and


## Translation

In [12]:
translator = pipeline("translation_en_to_es",
                     model="Helsinki-NLP/opus-mt-en-es")

In [13]:
translated_text = translator(dol_text, clean_up_tokenization_spaces=True)

In [14]:
print(translated_text[0]["translation_text"])

La regulación se aplicó en tres ciudades: Nueva York, Tampa y Miami.Será efectiva si el Departamento de Trabajo vuelve a reunirse en Washington con Obama.


## Text generation

In [15]:
from transformers import set_seed
set_seed(987)
text_gen = pipeline("text-generation")
response = "We are sorry that the product did not meet your expectations"
prompt = text + "\n\nCustomer service response:\n" + response
outputs = text_gen(prompt, max_length=200)

No model was supplied, defaulted to gpt2 (https://huggingface.co/gpt2)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [16]:
print(outputs[0]["generated_text"])


This listing is very misleading and I would not recommend ordering this product from this seller.

The listing clearly says NEW and says it includes a type cover...I did not receive a type cover and the unit looks refurbished, not new. I was expecting to receive it in the manufacturer’s box and shrink wrapped, instead it came in an unmarked brown cardboard box, the surface pro in a flimsy sleeve without even a plastic film on the screen.

Customer service response:
We are sorry that the product did not meet your expectations.

We received the package with the cover and packaging for the unit and do not return damaged or defective products.

The screen has a 2 year warranty.

This product can be ordered for $59.70 (as of August 2017) or more.

Click To Order
