In [18]:
from transformers import AutoTokenizer, pipeline, TFDistilBertModel

# Sentiment analysis

In [16]:
sentiment_analysis = pipeline("sentiment-analysis" ,model="nlptown/bert-base-multilingual-uncased-sentiment") 

In [24]:
en_text = "im so happy today"
fr_text = "je suis heureux"
ar_text = "أنا سعيد اليوم"
text = fr_text

In [19]:
result = sentiment_analysis(text)

In [25]:
sentiment = result[0]['label']
score = result[0]['score'] * 100

In [23]:
print("The sentiment is {}, and score is up to {:.2f} %".format(sentiment, score))

The sentiment is 4 stars, and score is up to 37.49 %


# Text generation

In [64]:
text_generator_en = pipeline("text-generation", model="xlnet-base-cased")
result = text_generator_en("the natural language processing is", max_length=50, do_sample=False)
result[0]["generated_text"]

'the natural language processing is a very important part of the natural language processing. The natural language processing is a very important part of the natural language processing. The natural language processing is a very important part of the natural language processing. The natural language processing is'

In [65]:
text_generator_ar = pipeline("text-generation", model="mofawzy/gpt2-arabic-sentence-generator")
result = text_generator_ar("مرحبا كيف", max_length=50, do_sample=False)
result[0]["generated_text"]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'مرحبا كيف يمكن ان تكون في هذه الحياة؟ وكيف كانت هذه الحياة هي الحياة؟ وكيف كانت الحياة غير ذلك؟ وكيف كانت الحياة غير مفهومة. وكيف كانت الحياة غير مفهومة. وكيف كانت الحياة غير مفهومة. وكيف كانت الحياة غير مفهومة. وكيف كانت الحياة غير'

 # Name entity recognition 

labels list : 

    "O",       # Outside of a named entity
    "B-MISC",  # Beginning of a miscellaneous entity right after another miscellaneous entity
    "I-MISC",  # Miscellaneous entity
    "B-PER",   # Beginning of a person's name right after another person's name
    "I-PER",   # Person's name
    "B-ORG",   # Beginning of an organisation right after another organisation
    "I-ORG",   # Organisation
    "B-LOC",   # Beginning of a location right after another location
    "I-LOC"    # Location

In [66]:
ner = pipeline("ner")

sequence_en = "Facebook est une société américaine créée en 2004 par Mark Zuckerberg. Elle est un des géants du Web, regroupés sous l'acronyme GAFAM, aux côtés de Google, Apple, Amazon et Microsoft."

ner(sequence_en)

[{'entity': 'I-ORG',
  'score': 0.9969145,
  'index': 1,
  'word': 'Facebook',
  'start': 0,
  'end': 8},
 {'entity': 'I-MISC',
  'score': 0.6294728,
  'index': 8,
  'word': 'am',
  'start': 25,
  'end': 27},
 {'entity': 'I-MISC',
  'score': 0.563962,
  'index': 9,
  'word': '##é',
  'start': 27,
  'end': 28},
 {'entity': 'I-MISC',
  'score': 0.90429735,
  'index': 10,
  'word': '##rica',
  'start': 28,
  'end': 32},
 {'entity': 'I-PER',
  'score': 0.9990637,
  'index': 18,
  'word': 'Mark',
  'start': 54,
  'end': 58},
 {'entity': 'I-PER',
  'score': 0.99688506,
  'index': 19,
  'word': 'Z',
  'start': 59,
  'end': 60},
 {'entity': 'I-PER',
  'score': 0.8452265,
  'index': 20,
  'word': '##uck',
  'start': 60,
  'end': 63},
 {'entity': 'I-PER',
  'score': 0.70506406,
  'index': 21,
  'word': '##er',
  'start': 63,
  'end': 65},
 {'entity': 'I-PER',
  'score': 0.9720863,
  'index': 22,
  'word': '##berg',
  'start': 65,
  'end': 69},
 {'entity': 'I-MISC',
  'score': 0.50998914,
  'inde

# Question answering

In [67]:
question_answerer = pipeline('question-answering')

In [68]:
context_en = "Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tunea model on a SQuAD task, you may leverage the `run_squad.py`."

answer_question_en = question_answerer(question = "What is extractive question answering?", context = context_en)
answer_question_en

{'score': 0.6209336519241333,
 'start': 33,
 'end': 94,
 'answer': 'the task of extracting an answer from a text given a question'}

In [69]:
context_ar = "الحج هو الركن الخامس من أركان الإسلام، لقول النبي محمد: 'بني الإسلام على خمس: شهادة أن لا إله إلا الله وأن محمداً رسول الله، وإقام الصلاة، وإيتاء الزكاة، وصوم رمضان، وحج البيت من استطاع إليه سبيلاً'"

answer_question_ar = question_answerer(question = "ما هو الحج ؟", context = context_ar)
answer_question_ar

{'score': 0.008487730287015438,
 'start': 5,
 'end': 20,
 'answer': 'هو الركن الخامس'}

In [70]:
context_fr = "Wikipédia en français est l'édition de Wikipédia en langue française. Elle est fondée le 23 mars 2001, deux mois après la création officielle de Wikipédia."

answer_question_fr = question_answerer(question = "Quand est ce que la wikipédia a été fondée ?", context = context_fr)
answer_question_fr

{'score': 0.37040311098098755,
 'start': 86,
 'end': 101,
 'answer': 'le 23 mars 2001'}

# Translation

In [71]:
en_to_fr = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
result_fr = en_to_fr("hello, how are you?")
result_fr

[{'translation_text': 'Bonjour, comment allez-vous ?'}]

In [72]:
fr_to_en = pipeline("translation_fr_to_en", model="Helsinki-NLP/opus-mt-fr-en")
result_en = fr_to_en("comment ça va ?")
result_en

[{'translation_text': 'How are you?'}]

In [10]:
fr_to_ar = pipeline("translation_fr_to_ar", model="Helsinki-NLP/opus-mt-fr-ar")
result_ar = fr_to_ar("comment ça va ?")
result_ar

[{'translation_text': 'كيف حالك؟ كيف حالك؟'}]

# Summarization

In [73]:
summarizer = pipeline("summarization")

In [74]:
article_en = "I’m honored to be with you today because, let’s face it, you accomplished something I never could. If I get through this speech, it’ll be the first time I actually finish something at Harvard. Class of 2017, congratulations! I’m an unlikely speaker, not just because I dropped out, but because we’re technically in the same generation. We walked this yard less than a decade apart, studied the same ideas and slept through the same Ec10 lectures. We may have taken different paths to get here, especially if you came all the way from the Quad, but today I want to share what I’ve learned about our generation and the world we’re building together. But first, the last couple of days have brought back a lot of good memories. How many of you remember exactly what you were doing when you got that email telling you that you got into Harvard? I was playing Civilization and I ran downstairs, got my dad, and for some reason, his reaction was to video me opening the email. That could have been a really sad video. I swear getting into Harvard is still the thing my parents are most proud of me for. What about your first lecture at Harvard? Mine was Computer Science 121 with the incredible Harry Lewis. I was late so I threw on a t-shirt and didn’t realize until afterwards it was inside out and backwards with my tag sticking out the front. I couldn’t figure out why no one would talk to me — except one guy, KX Jin, he just went with it. We ended up doing our problem sets together, and now he runs a big part of Facebook. And that, Class of 2017, is why you should be nice to people. But my best memory from Harvard was meeting Priscilla. I had just launched this prank website Facemash, and the ad board wanted to “see me”. Everyone thought I was going to get kicked out. My parents came to help me pack. My friends threw me a going away party. As luck would have it, Priscilla was at that party with her friend. We met in line for the bathroom in the Pfoho Belltower, and in what must be one of the all time romantic lines, I said: “I’m going to get kicked out in three days, so we need to go on a date quickly.” Actually, any of you graduating can use that line. I didn’t end up getting kicked out — I did that to myself. Priscilla and I started dating. And, you know, that movie made it seem like Facemash was so important to creating Facebook. It wasn’t. But without Facemash I wouldn’t have met Priscilla, and she’s the most important person in my life, so you could say it was the most important thing I built in my time here."

result = summarizer(article_en, max_length = 500, min_length = 30)
result

[{'summary_text': " Class of 2017, congratulations! I'm an unlikely speaker, not just because I dropped out, but because we're technically in the same generation . I'm honored to be with you today because, let’s face it, you accomplished something I never could. If I get through this speech, it’ll be the first time I actually finish something at Harvard. But my best memory from Harvard was meeting Priscilla. I had just launched this prank website Facemash ."}]

# Fill mask

In [75]:
fill_mask = pipeline("fill-mask")

In [76]:
result = fill_mask(f"I want to {fill_mask.tokenizer.mask_token} English.")
result

[{'sequence': 'I want to learn English.',
  'score': 0.45623907446861267,
  'token': 1532,
  'token_str': ' learn'},
 {'sequence': 'I want to speak English.',
  'score': 0.44010689854621887,
  'token': 1994,
  'token_str': ' speak'},
 {'sequence': 'I want to teach English.',
  'score': 0.030160952359437943,
  'token': 6396,
  'token_str': ' teach'},
 {'sequence': 'I want to write English.',
  'score': 0.01819874346256256,
  'token': 3116,
  'token_str': ' write'},
 {'sequence': 'I want to understand English.',
  'score': 0.006681192200630903,
  'token': 1346,
  'token_str': ' understand'}]

# Feature extraction

In [12]:
feature_extraction = pipeline('feature-extraction', model="distilroberta-base", tokenizer="distilroberta-base")
features = feature_extraction("je suis imad")

len(features[0])

Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


7

In [13]:
features[0]

[[-0.009394753724336624,
  0.06289573013782501,
  -0.003261573612689972,
  -0.1365998387336731,
  0.08454878628253937,
  -0.11255133897066116,
  -0.030160214751958847,
  0.051766373217105865,
  0.04071718826889992,
  -0.04688403755426407,
  -0.034458622336387634,
  0.06681641936302185,
  0.05866669863462448,
  -0.040317073464393616,
  0.07575491070747375,
  0.005419265478849411,
  -0.07873965054750443,
  0.03339861333370209,
  0.03617656230926514,
  -0.05647195130586624,
  -0.01008836179971695,
  0.021149005740880966,
  -0.0690617561340332,
  0.08810517191886902,
  -0.052150823175907135,
  0.010373000055551529,
  0.10880371928215027,
  0.04851650819182396,
  -0.037980321794748306,
  0.0022448748350143433,
  0.01105172373354435,
  -0.006453491747379303,
  0.013437088578939438,
  0.060150451958179474,
  -0.017150910571217537,
  0.04144970327615738,
  0.05802910774946213,
  0.03098256327211857,
  -0.0646669790148735,
  0.029596179723739624,
  -0.022294264286756516,
  -0.015288516879081726