In [1]:
!pip install transformers



In [2]:
from transformers import pipeline

# Sentiment analysis

In [3]:
classifier = pipeline('sentiment-analysis')

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

In [4]:
classifier('I like the products, and the service was good.. I have no recommendations to make..')

[{'label': 'POSITIVE', 'score': 0.9992836117744446}]

In [5]:
classifier('CLARINS-FIDELITE. I received a loyality code for 20 euro off an order over 50 euro, i have tried several times to enter the code and it keeps saying code unknown, can you look into this please and get back to me as soon as possible, or send me a new code. Thanks')

[{'label': 'NEGATIVE', 'score': 0.9893026947975159}]

In [6]:
classifier('Fast delivery, easy website. Nothing i thought it was very good.')

[{'label': 'POSITIVE', 'score': 0.7927373051643372}]

# Question answering

In [7]:
question_answerer = pipeline('question-answering')

No model was supplied, defaulted to distilbert-base-cased-distilled-squad (https://huggingface.co/distilbert-base-cased-distilled-squad)


Downloading:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/249M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

In [8]:
question_answerer({'question': 'What is the name of the repository ?', 'context': 'Pipeline has been included in the huggingface/transformers repository'})

{'score': 0.309701532125473,
 'start': 34,
 'end': 58,
 'answer': 'huggingface/transformers'}

In [9]:
resource = """
Emmanuel Jean-Michel Frédéric Macron (French: [emanɥɛl ʒɑ̃ miʃɛl fʁedeʁik makʁɔ̃]; born 21 December 1977) is a French politician who has been serving as the president of France since 14 May 2017.

Born in Amiens, Macron studied philosophy at Paris Nanterre University, later completing a master's degree in public affairs at Sciences Po and graduating from the École nationale d'administration in 2004. He worked as a senior civil servant at the Inspectorate General of Finances and later became an investment banker at Rothschild & Co.

Macron was appointed a deputy secretary general by President François Hollande shortly after his election in May 2012, making Macron one of Hollande's senior advisers. He was later appointed to the French cabinet as Minister of the Economy, Industry and Digital Affairs in August 2014 by prime minister Manuel Valls. In this role, Macron championed a number of business-friendly reforms. He resigned from the cabinet in August 2016, launching a campaign for the 2017 French presidential election. Although Macron had been a member of the Socialist Party from 2006 to 2009, he ran in the election under the banner of La République En Marche!, a centrist and pro-European political movement he founded in April 2016.

Partly thanks to the Fillon affair, Macron topped the ballot in the first round of voting, and was elected President of France on 7 May 2017 with 66.1% of the vote in the second round, defeating Marine Le Pen. At the age of 39, Macron became the youngest president in French history. He appointed Édouard Philippe as prime minister, and in the 2017 French legislative election a month later Macron's party, renamed La République En Marche (LREM), secured a majority in the National Assembly. During his presidency, Macron has overseen several reforms to labour laws and taxation. Opposition to his reforms, particularly a proposed fuel tax, culminated in the 2018 yellow vests protests and other protests. In 2020, he appointed Jean Castex as prime minister following Philippe's resignation. From 2020, he has led France's ongoing response to the COVID-19 pandemic and vaccination rollout.
"""

In [10]:
question_answerer({'question': 'When is Mr Macron born ?', 'context': resource})

  return array(a, dtype, copy=False, order=order)


{'score': 0.9836931228637695,
 'start': 89,
 'end': 105,
 'answer': '21 December 1977'}

In [11]:
question_answerer({'question': 'Who is Mr Macron ?', 'context': resource})

{'score': 0.056360770016908646,
 'start': 110,
 'end': 177,
 'answer': 'a French politician who has been serving as the president of France'}

In [12]:
question_answerer({'question': 'Where is Mr Macron born ? ', 'context': resource})

{'score': 0.9797683358192444, 'start': 206, 'end': 212, 'answer': 'Amiens'}

# Create a class that answers questions on a subject

In [13]:
wikipedia_search_page = "https://fr.wikipedia.org/w/index.php?title=Sp%C3%A9cial%3ARecherche&fulltext=1&search={search}&ns0=1"

In [14]:
import requests
from bs4 import BeautifulSoup

class WikipediaParser():
    """FIXME"""
    def __init__(self):
        self.wikipedia_search_page = "https://en.wikipedia.org/w/index.php?search={search}&title=Special%3ASearch&fulltext=1"
        self.base_url = "https://en.wikipedia.org"
    
    def search(self, search):
        """FIXME"""
        search_url = self.wikipedia_search_page.format(search=search)
        r = requests.get(search_url)
        soup = BeautifulSoup(r.content)
        
        # Getting the first result of the search
        first_result = soup.find_all('div', class_='mw-search-result-heading')[0]
        first_result_url = self.base_url + first_result.find_all('a', href=True)[0]['href']
            
        # Getting all the <p>
        r = requests.get(first_result_url)
        soup = BeautifulSoup(r.content)
        page_content = ""
        for data in soup.find_all("p"): 
            page_content += data.get_text()
        
        # Cleaning
        page_content = page_content.replace('\n', ' ').replace('\t', ' ')
        print(f"Content page is {len(page_content)} long.")
        
        return page_content

In [15]:
class AnswerRobot():
    """FIXME"""
    def __init__(self, topic):
        self.question_answerer = pipeline('question-answering')
        
        wp = WikipediaParser()
        self.context = wp.search(topic)
    
    def ask(self, question):
        answer = question_answerer({'question': question, 'context': self.context})
        return answer

In [16]:
ar = AnswerRobot('macron')

No model was supplied, defaulted to distilbert-base-cased-distilled-squad (https://huggingface.co/distilbert-base-cased-distilled-squad)


Content page is 74370 long.


In [17]:
ar.ask('Who is Macron ?')

{'score': 0.9869561195373535,
 'start': 74109,
 'end': 74121,
 'answer': 'Damien Tarel'}

In [18]:
ar.ask('How old is Macron ?')

{'score': 0.9451642036437988, 'start': 1547, 'end': 1549, 'answer': '39'}

In [19]:
# FIXME : Wrong answers !