# Transformers

In [1]:
import spacy
from spacy import displacy

nlp_en_lg = spacy.load('en_core_web_lg')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
text_sample = """As regulators, official bodies, and general users come to depend on AI-based dynamic systems, clearer accountability will be required for automated decision-making processes to ensure trust and transparency. Evidence of this requirement gaining more momentum can be seen with the launch of the first global conference exclusively dedicated to this emerging discipline, the International Joint Conference on Artificial Intelligence: Workshop on Explainable Artificial Intelligence (XAI).[63]

The European Union introduced a right to explanation in the General Data Protection Right (GDPR) as an attempt to deal with the potential problems stemming from the rising importance of algorithms. The implementation of the regulation began in 2018. However, the right to explanation in GDPR covers only the local aspect of interpretability. In the United States, insurance companies are required to be able to explain their rate and coverage decisions.[64]
"""

doc = nlp_en_lg(text_sample)
print(type(doc))

displacy.render(doc, style = "ent")

<class 'spacy.tokens.doc.Doc'>


In [3]:
# Named Entity Recognition (NER) with BERT

nlp_en_trf = spacy.load("en_core_web_trf")
doc = nlp_en_trf(text_sample)
displacy.render(doc, style = "ent")



In [6]:
# NER Swedish

nlp_swe = spacy.load("sv_core_news_sm")

text_sample_swe = """
Grannlandet Norge har kommit långt med att elektrifiera sin bilflotta. Om ett år kommer nybilsförsäljningen i Norge vara uppe i 100 procent bilar med sladd. 
Min kollega , techkorrespondenten Alexander Norén berättar att det som förbluffade honom när han åkte till Norge för att få förklaringen till elbilsboomen där var hur starka de ekonomiska incitamenten är, att det för många är en plånboksfråga att dumpa fossilbilen. 
"""
doc = nlp_swe(text_sample_swe)
displacy.render(doc, "ent")

In [10]:
entities = {f"{entity}": entity.label_ for entity in doc.ents}
entities

{'Norge': 'LOC', 'Alexander Norén': 'PRS'}

## Hugging face

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("marma/bert-base-swedish-cased-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("marma/bert-base-swedish-cased-sentiment") 
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(50325, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [5]:
from transformers import pipeline

sentiment = pipeline('sentiment-analysis', model='marma/bert-base-swedish-cased-sentiment')
sentiment("solen skiner idag!")

[{'label': 'POSITIVE', 'score': 0.9986221790313721}]

In [7]:
sentences = ["jag älskar digf sådär mycket",
             "Du är helt okej",
             "matematik",
             "glaset är halvfullt",
             "glaset är halvromt",
             "jag har ätit pannkaka",
             "när du tar av dig skorna blir allt skönt",
             "gillar du pannkaka?"]

for sentence in sentences:
    label, score = sentiment(sentence)[0]["label"], sentiment(sentence)[0]["score"]
    print(f"{sentence}: {label}, {score:.3f}")

jag älskar digf sådär mycket: POSITIVE, 0.999
Du är helt okej: POSITIVE, 0.999
matematik: POSITIVE, 0.984
glaset är halvfullt: NEGATIVE, 0.998
glaset är halvromt: POSITIVE, 0.618
jag har ätit pannkaka: NEGATIVE, 0.998
när du tar av dig skorna blir allt skönt: POSITIVE, 0.998
gillar du pannkaka?: NEGATIVE, 0.997


# GPT-2

In [8]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForCausalLM.from_pretrained("gpt2")

Downloading: 100%|██████████| 665/665 [00:00<00:00, 664kB/s]
Downloading: 100%|██████████| 0.99M/0.99M [00:00<00:00, 1.76MB/s]
Downloading: 100%|██████████| 446k/446k [00:00<00:00, 930kB/s] 
Downloading: 100%|██████████| 1.29M/1.29M [00:00<00:00, 1.98MB/s]
Downloading: 100%|██████████| 523M/523M [00:18<00:00, 30.2MB/s] 


In [11]:
from transformers import pipeline, set_seed
gpt2 = pipeline('text-generation', model='gpt2')
set_seed(42)
gpt2("Hello, I'm a language model,", max_length=30, num_return_sequences=5)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "Hello, I'm a language model, I'm writing a new language for you. But first, I'd like to tell you about the language itself"},
 {'generated_text': "Hello, I'm a language model, and I'm trying to be as expressive as possible. In order to be expressive, it is necessary to know"},
 {'generated_text': "Hello, I'm a language model, so I don't get much of a license anymore, but I'm probably more familiar with other languages on that"},
 {'generated_text': "Hello, I'm a language model, a functional model... It's not me, it's me!\n\nI won't bore you with how"},
 {'generated_text': "Hello, I'm a language model, not an object model.\n\nIn a nutshell, I need to give language model a set of properties that"}]

In [18]:
print(gpt2("Welcome to IT-H;gskolan", max_length = 100)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Welcome to IT-H;gskolan's work is the only way i can help in this area; i am not a registered user with IT-H.


There are some questions for you in case your questions are not answered and you need to leave some suggestions. Please PM. You can also donate by clicking here.


The donations are for a simple way to get donations for things like equipment and stuff for you; i'll start an account if you want. The donation can


In [20]:
print(gpt2("Welcome to IT-Högskolan, we are a school specilized in IT. Our school has around 500 students., We are in Gothenburgh and Stockholm.", max_length = 150)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Welcome to IT-Högskolan, we are a school specilized in IT. Our school has around 500 students., We are in Gothenburgh and Stockholm. Our first class started in 1990. We have around 300 students. All of these schools are working in the IT community at this time. We have over 6,000 students for high-school classes. Our main objective after the school year is to develop a local IT community. Our goal is to improve our knowledge of IT, as well as to introduce ourselves to the local communities.

We are also working as a non-profit agency. We have a small team on staff working on IT-Högskolan's main mission and we have been working as


In [24]:
print(gpt2("Frontend? backend :( weekend", max_length = 100)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Frontend? backend :( weekend 2 week. Just a question :)

This is the third chapter of the series...

For your convenience, any translation into other languages (such as German and French) will work! It was a bit awkward to translate English into German because there is a German part to English, and a German part to Italian and French, but you can also translate Chinese and Russian into Chinese.

Any questions are welcome, just let me know in the forum.




In [25]:
print(gpt2("Guldis was a wonderful cat", max_length = 100)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Guldis was a wonderful cat! We've never seen she before. I've never been to the zoo or even seen her before and as soon as she showed up I was like 'oh my gosh...she has these big white eyes and she looks like her mommy!'"

"Well it is actually quite strange having a cat on the beach that I've never seen of yet. All of these pictures I have of her have been so long she's looking rather like someone from the


In [26]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("birgermoell/swedish-gpt")

model = AutoModelForCausalLM.from_pretrained("birgermoell/swedish-gpt")

Downloading: 100%|██████████| 207/207 [00:00<00:00, 207kB/s]
Downloading: 100%|██████████| 835k/835k [00:00<00:00, 1.59MB/s]
Downloading: 100%|██████████| 501k/501k [00:00<00:00, 1.04MB/s]
Downloading: 100%|██████████| 1.40M/1.40M [00:00<00:00, 2.13MB/s]
Downloading: 100%|██████████| 24.0/24.0 [00:00<00:00, 24.0kB/s]
Downloading: 100%|██████████| 90.0/90.0 [00:00<00:00, 89.9kB/s]
Downloading: 100%|██████████| 863/863 [00:00<00:00, 862kB/s]
Downloading: 100%|██████████| 487M/487M [00:17<00:00, 29.2MB/s] 


In [28]:
gtp_swe = pipeline("text-generation", model = "birgermoell/swedish-gpt")

In [29]:
gtp_swe("Grattis på din födelsedag!")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Grattis på din födelsedag!!! Vi firade den med tårta och presenter, men idag mår jag mycket bättre, nästan bättre, så jag fortsätter firandet! Men först lite födelsedagsgubbarna!!!! Nu är jag i alla fall'}]