In [None]:
!pip install -q pyspark spark-nlp

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.3/317.3 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.6/55.6 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m579.5/579.5 kB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pyspark (setup.py) ... [?25l[?25hdone


In [None]:
import sparknlp

from pyspark.ml import PipelineModel
from sparknlp.annotator import *
from sparknlp.base import *

spark = sparknlp.start()
spark

In [None]:
documenter = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("document")

sentencerDL = SentenceDetectorDLModel\
  .pretrained("sentence_detector_dl", "en") \
  .setInputCols(["document"]) \
  .setOutputCol("sentences")

sd_pipeline = PipelineModel(stages=[documenter, sentencerDL])


sentence_detector_dl download started this may take some time.
Approximate size to download 354.6 KB
[OK!]


In [None]:
sd_model = LightPipeline(sd_pipeline)

In [None]:

text = """John loves Mary.mary loves Peter
          Peter loves Helen .Helen loves John;
          Total: four. people involved."""

for anno in sd_model.fullAnnotate(text)[0]["sentences"]:
  print(anno.result)

John loves Mary.
mary loves Peter
Peter loves Helen .
Helen loves John;
Total: four. people involved.


In [None]:

text = """John loves Mary.mary loves Peter
          Peter loves Helen .Helen loves John;
          Total: four. people involved."""

for anno in sd_model.fullAnnotate(text)[0]["sentences"]:
    print("{}\t{}\t{}\t{}".format(
        anno.metadata["sentence"], anno.begin, anno.end, anno.result))

0	0	15	John loves Mary.
1	16	31	mary loves Peter
2	43	61	Peter loves Helen .
3	62	78	Helen loves John;
4	91	119	Total: four. people involved.


##  Testing with a Broken Text (random \n chars added)

In [None]:
text = '''
There are many NLP tasks like text summarization, question-answering, sentence prediction to name a few. One method to get\n these tasks done is using a pre-trained model. Instead of training
a model from scratch for NLP tasks using millions of annotated texts each time, a general language representation is created by training a model on a huge amount of data. This is called a pre-trained model. This pre-trained model is
then fine-tuned for each NLP tasks according to need.
Let’s just peek into the pre-BERT world…
For creating models, we need words to be represented in a form \n understood by the training network, ie, numbers. Thus many algorithms were used to convert words into vectors or more precisely, word embeddings.
One of the earliest algorithms used for this purpose is word2vec. However, the drawback of word2vec models was that they were context-free. One problem caused by this is that they cannot accommodate polysemy. For example, the word ‘letter’ has a different meaning according to the context. It can mean ‘single element of alphabet’ or ‘document addressed to another person’. But in word2vec both the letter returns same embeddings.
'''

for anno in sd_model.fullAnnotate(text)[0]["sentences"]:

    print("{}\t{}\t{}\t{}".format(
        anno.metadata["sentence"], anno.begin, anno.end, anno.result.replace('\n',''))) # removing \n to beutify printing

0	1	104	There are many NLP tasks like text summarization, question-answering, sentence prediction to name a few.
1	106	170	One method to get these tasks done is using a pre-trained model.
2	172	362	Instead of training a model from scratch for NLP tasks using millions of annotated texts each time, a general language representation is created by training a model on a huge amount of data.
3	364	398	This is called a pre-trained model.
4	400	479	This pre-trained model is then fine-tuned for each NLP tasks according to need.
5	481	520	Let’s just peek into the pre-BERT world…
6	522	634	For creating models, we need words to be represented in a form  understood by the training network, ie, numbers.
7	636	731	Thus many algorithms were used to convert words into vectors or more precisely, word embeddings.
8	734	798	One of the earliest algorithms used for this purpose is word2vec.
9	800	872	However, the drawback of word2vec models was that they were context-free.
10	874	941	One problem caused by t

In [None]:
sentencerDL.extractParamMap()

{Param(parent='SentenceDetectorDLModel_c83c27f46b97', name='customBounds', doc='characters used to explicitly mark sentence bounds'): [],
 Param(parent='SentenceDetectorDLModel_c83c27f46b97', name='engine', doc='Deep Learning engine used for this model'): 'tensorflow',
 Param(parent='SentenceDetectorDLModel_c83c27f46b97', name='explodeSentences', doc='whether to explode each sentence into a different row, for better parallelization. Defaults to false.'): False,
 Param(parent='SentenceDetectorDLModel_c83c27f46b97', name='lazyAnnotator', doc='Whether this AnnotatorModel acts as lazy in RecursivePipelines'): False,
 Param(parent='SentenceDetectorDLModel_c83c27f46b97', name='maxLength', doc='Set the maximum allowed length for each sentence'): 99999,
 Param(parent='SentenceDetectorDLModel_c83c27f46b97', name='minLength', doc='Set the minimum allowed length for each sentence.'): 0,
 Param(parent='SentenceDetectorDLModel_c83c27f46b97', name='splitLength', doc='length at which sentences will b

## setMaxLength

In [None]:

text = '''
There are many NLP tasks like text summarization, question-answering, sentence prediction to name a few. One method to get\n these tasks done is using a pre-trained model. Instead of training
a model from scratch for NLP tasks using millions of annotated texts each time, a general language representation is created by training a model on a huge amount of data. This is called a pre-trained model. This pre-trained model is
then fine-tuned for each NLP tasks according to need.
Let’s just peek into the pre-BERT world…
For creating models, we need words to be represented in a form \n understood by the training network, ie, numbers. Thus many algorithms were used to convert words into vectors or more precisely, word embeddings.
One of the earliest algorithms used for this purpose is word2vec. However, the drawback of word2vec models was that they were context-free. One problem caused by this is that they cannot accommodate polysemy. For example, the word ‘letter’ has a different meaning according to the context. It can mean ‘single element of alphabet’ or ‘document addressed to another person’. But in word2vec both the letter returns same embeddings.
'''

In [None]:
documenter = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("document")

sentencerDL = SentenceDetectorDLModel\
  .pretrained("sentence_detector_dl", "en") \
  .setInputCols(["document"]) \
  .setOutputCol("sentences")\
  .setMaxLength(80)

sd_pipeline2 = PipelineModel(stages=[documenter, sentencerDL])

sentence_detector_dl download started this may take some time.
Approximate size to download 354.6 KB
[OK!]


In [None]:
sd_model2 = LightPipeline(sd_pipeline2)

In [None]:
for anno in sd_model2.fullAnnotate(text)[0]["sentences"]:

    print("{}\t{}\t{}\t{}".format(
        anno.metadata["sentence"], anno.begin, anno.end, anno.result.replace('\n',''))) # removing \n to beutify printing|


1	106	170	One method to get these tasks done is using a pre-trained model.
3	364	398	This is called a pre-trained model.
4	400	479	This pre-trained model is then fine-tuned for each NLP tasks according to need.
5	481	520	Let’s just peek into the pre-BERT world…
8	734	798	One of the earliest algorithms used for this purpose is word2vec.
9	800	872	However, the drawback of word2vec models was that they were context-free.
10	874	941	One problem caused by this is that they cannot accommodate polysemy.
11	943	1022	For example, the word ‘letter’ has a different meaning according to the context.
13	1108	1163	But in word2vec both the letter returns same embeddings.


By using the setMaxLength parameter, number of characters a sentence can have is limited, as shown by the less number of detected sentences.

## setMinLength

In [None]:
documenter = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("document")

sentencerDL = SentenceDetectorDLModel\
  .pretrained("sentence_detector_dl", "en") \
  .setInputCols(["document"]) \
  .setOutputCol("sentences")\
  .setMaxLength(1000)\
  .setMinLength(50)

sd_pipeline3 = PipelineModel(stages=[documenter, sentencerDL])

sentence_detector_dl download started this may take some time.
Approximate size to download 354.6 KB
[OK!]


In [None]:
sd_model3 = LightPipeline(sd_pipeline3)

In [None]:

for anno in sd_model3.fullAnnotate(text)[0]["sentences"]:

    print("{}\t{}\t{}\t{}".format(
        anno.metadata["sentence"], anno.begin, anno.end, anno.result.replace('\n','')))

0	1	104	There are many NLP tasks like text summarization, question-answering, sentence prediction to name a few.
1	106	170	One method to get these tasks done is using a pre-trained model.
2	172	362	Instead of training a model from scratch for NLP tasks using millions of annotated texts each time, a general language representation is created by training a model on a huge amount of data.
4	400	479	This pre-trained model is then fine-tuned for each NLP tasks according to need.
6	522	634	For creating models, we need words to be represented in a form  understood by the training network, ie, numbers.
7	636	731	Thus many algorithms were used to convert words into vectors or more precisely, word embeddings.
8	734	798	One of the earliest algorithms used for this purpose is word2vec.
9	800	872	However, the drawback of word2vec models was that they were context-free.
10	874	941	One problem caused by this is that they cannot accommodate polysemy.
11	943	1022	For example, the word ‘letter’ has a di

By using the setMinLength parameter, number of characters a sentence can have is limited.

## setCustomBounds and setUseCustomBoundsOnly

In [None]:
documenter = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("document")

sentencerDL = SentenceDetectorDLModel\
  .pretrained("sentence_detector_dl", "en") \
  .setInputCols(["document"]) \
  .setOutputCol("sentences")\
  .setMaxLength(1000)\
  .setMinLength(0)\
  .setCustomBounds(["!!"])\
  .setUseCustomBoundsOnly(True)

sd_pipeline4 = PipelineModel(stages=[documenter, sentencerDL])

sentence_detector_dl download started this may take some time.
Approximate size to download 354.6 KB
[OK!]


In [None]:

text = '''
There are many NLP tasks like text summarization, question-answering, sentence prediction to name a few!! One method to get these tasks done is using a pre-trained model. Instead of training
a model from scratch for NLP tasks using millions of annotated texts each time, a general language representation is created by training a model on a huge amount of data. This is called a pre-trained model. This pre-trained model is
then fine-tuned for each NLP tasks according to need.
Let’s just peek into the pre-BERT world…
For creating models, we need words to be represented in a form understood by the training network, ie, numbers. Thus many algorithms were used to convert words into vectors or more precisely, word embeddings.
One of the earliest algorithms used for this purpose is word2vec. However, the drawback of word2vec models was that they were context-free. One problem caused by this is that they cannot accommodate polysemy. For example, the word ‘letter’ has a different meaning according to the context. It can mean ‘single element of alphabet’ or ‘document addressed to another person’. But in word2vec both the letter returns same embeddings.
'''

In [None]:
sd_model4 = LightPipeline(sd_pipeline4)

In [None]:
for anno in sd_model4.fullAnnotate(text)[0]["sentences"]:

    print("{}\t{}\t{}\t{}".format(
        anno.metadata["sentence"], anno.begin, anno.end, anno.result.replace('\n','')))

0	1	105	There are many NLP tasks like text summarization, question-answering, sentence prediction to name a few!!


In this case, there is one sentence ending with '!!' and the annotator is expected to detect only this sentence.

## setSplitLength

setSplitLength parameter can be used to set the length at which sentences will be forcibly split. It is ignored if not set.

In [None]:
text = '''
There are many NLP tasks like text summarization, question-answering, sentence prediction to name a few. One method to get\n these tasks done is using a pre-trained model. Instead of training
a model from scratch for NLP tasks using millions of annotated texts each time, a general language representation is created by training a model on a huge amount of data. This is called a pre-trained model. This pre-trained model is
then fine-tuned for each NLP tasks according to need.
Let’s just peek into the pre-BERT world…
For creating models, we need words to be represented in a form \n understood by the training network, ie, numbers. Thus many algorithms were used to convert words into vectors or more precisely, word embeddings.
One of the earliest algorithms used for this purpose is word2vec. However, the drawback of word2vec models was that they were context-free. One problem caused by this is that they cannot accommodate polysemy. For example, the word ‘letter’ has a different meaning according to the context. It can mean ‘single element of alphabet’ or ‘document addressed to another person’. But in word2vec both the letter returns same embeddings.
'''

In [None]:
documenter = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("document")

sentencerDL = SentenceDetectorDLModel\
  .pretrained("sentence_detector_dl", "en") \
  .setInputCols(["document"]) \
  .setOutputCol("sentences")\
  .setMaxLength(1000)\
  .setMinLength(0)\
  .setSplitLength(50) \
  .setCustomBounds([])\
  .setUseCustomBoundsOnly(False)


sd_pipeline5 = PipelineModel(stages=[documenter, sentencerDL])

sentence_detector_dl download started this may take some time.
Approximate size to download 354.6 KB
[OK!]


In [None]:
sd_model5 = LightPipeline(sd_pipeline5)

In [None]:

for anno in sd_model5.fullAnnotate(text)[0]["sentences"]:

    print("{}\t{}\t{}\t{}".format(
        anno.metadata["sentence"], anno.begin, anno.end, anno.result.replace('\n','')))

0	1	49	There are many NLP tasks like text summarization,
1	50	98	question-answering, sentence prediction to name a
2	99	102	few.
3	106	151	One method to get these tasks done is using a
4	152	169	pre-trained model.
5	172	220	Instead of training a model from scratch for NLP
6	221	270	tasks using millions of annotated texts each time,
7	271	317	a general language representation is created by
8	318	359	training a model on a huge amount of data.
9	364	398	This is called a pre-trained model.
10	400	445	This pre-trained model is then fine-tuned for
11	446	478	each NLP tasks according to need.
12	481	520	Let’s just peek into the pre-BERT world…
13	522	561	For creating models, we need words to be
14	562	611	represented in a form  understood by the training
15	612	632	network, ie, numbers.
16	636	682	Thus many algorithms were used to convert words
17	683	730	into vectors or more precisely, word embeddings.
18	734	777	One of the earliest algorithms used for this
19	778	797	purpose is word2vec.
20

The effect of using the setSplitLength to limiting the length of a sentence to 50 characters can be seen above.

## setImpossiblePenultimates

setImpossiblePenultimates parameter can be used to define a list of strings which a sentence can’t end with. It is ignored and Default List is used if not set.

In [None]:
documenter = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("document")

sentencerDL = SentenceDetectorDLModel\
  .pretrained("sentence_detector_dl", "en") \
  .setInputCols(["document"]) \
  .setOutputCol("sentences") \
  .setMaxLength(1000)\
  .setMinLength(0)\
  .setSplitLength(1000) \
  .setCustomBounds([])\
  .setUseCustomBoundsOnly(False)\
  .setImpossiblePenultimates(["few", "data", "model"])

sd_pipeline6 = PipelineModel(stages=[documenter, sentencerDL])


sentence_detector_dl download started this may take some time.
Approximate size to download 354.6 KB
[OK!]


In [None]:
sd_model6 = LightPipeline(sd_pipeline6)

In [None]:

for anno in sd_model6.fullAnnotate(text)[0]["sentences"]:

    print("{}\t{}\t{}\t{}".format(
        anno.metadata["sentence"], anno.begin, anno.end, anno.result.replace('\n','')))

0	1	479	There are many NLP tasks like text summarization, question-answering, sentence prediction to name a few. One method to get these tasks done is using a pre-trained model. Instead of training a model from scratch for NLP tasks using millions of annotated texts each time, a general language representation is created by training a model on a huge amount of data. This is called a pre-trained model. This pre-trained model is then fine-tuned for each NLP tasks according to need.
1	481	520	Let’s just peek into the pre-BERT world…
2	522	634	For creating models, we need words to be represented in a form  understood by the training network, ie, numbers.
3	636	731	Thus many algorithms were used to convert words into vectors or more precisely, word embeddings.
4	734	798	One of the earliest algorithms used for this purpose is word2vec.
5	800	872	However, the drawback of word2vec models was that they were context-free.
6	874	941	One problem caused by this is that they cannot accommodate polys

We defined a short list of strings which tells the model to never end a sentence when those words/strings are encountered.

Although there is a full stop after them, those words were ignored during sentence boundary detection.

## Multilanguage SentenceDetectorDL

In [None]:
sentencerDL_multilang = SentenceDetectorDLModel\
  .pretrained("sentence_detector_dl", "xx") \
  .setInputCols(["document"]) \
  .setOutputCol("sentences")

sd_pipeline_multi = PipelineModel(stages=[documenter, sentencerDL_multilang])

sd_model_multi = LightPipeline(sd_pipeline_multi)

sentence_detector_dl download started this may take some time.
Approximate size to download 514.9 KB
[OK!]


In [None]:
tr_text = """
Metin özetleme, soru-cevaplama, cümle tahmini gibi birçok NLP görevi vardır.
Bu görevleri gerçekleştirmek için kullanılan bir yöntem, önceden eğitilmiş bir model kullanmaktır.
Her seferinde milyonlarca etiketlenmiş metinle sıfırdan bir model eğitmek yerine, büyük miktarda veri üzerinde bir model eğitilerek genel bir dil temsili oluşturulur.
Buna önceden eğitilmiş model denir. Bu önceden eğitilmiş model, her NLP görevi için ihtiyaçlara göre ince ayar yapılır. BERT öncesi dünyaya bir göz atalım...
Modeller oluşturmak için, kelimelerin eğitim ağı tarafından anlaşılan bir formda, yani sayılarla temsil edilmesi gerekir.
Bu nedenle, kelimeleri vektörlere veya daha doğrusu kelime yerleştirmelerine dönüştürmek için birçok algoritma kullanılmıştır.
Bu amaçla kullanılan en erken algoritmalardan biri word2vec'tir. Ancak word2vec modellerinin dezavantajı, bağlamdan bağımsız olmalarıydı.
Bu durumun neden olduğu bir sorun, çok anlamlılığı barındıramamalarıdır. Örneğin, 'mektup' kelimesi bağlama göre farklı anlamlar taşır.
'Alfabenin tek bir öğesi' veya 'başka bir kişiye hitaben yazılmış belge' anlamına gelebilir.
Ancak word2vec'te her iki durumda da mektup aynı yerleştirmeleri döndürür.
"""

for anno in sd_model_multi.fullAnnotate(tr_text)[0]["sentences"]:

    print("{}\t{}".format(
        anno.metadata["sentence"], anno.result.replace('\n',''))) # removing \n to beutify printing

0	Metin özetleme, soru-cevaplama, cümle tahmini gibi birçok NLP görevi vardır.
1	Bu görevleri gerçekleştirmek için kullanılan bir yöntem, önceden eğitilmiş bir model kullanmaktır.
2	Her seferinde milyonlarca etiketlenmiş metinle sıfırdan bir model eğitmek yerine, büyük miktarda veri üzerinde bir model eğitilerek genel bir dil temsili oluşturulur.
3	Buna önceden eğitilmiş model denir.
4	Bu önceden eğitilmiş model, her NLP görevi için ihtiyaçlara göre ince ayar yapılır.
5	BERT öncesi dünyaya bir göz atalım.
6	..
7	Modeller oluşturmak için, kelimelerin eğitim ağı tarafından anlaşılan bir formda, yani sayılarla temsil edilmesi gerekir.
8	Bu nedenle, kelimeleri vektörlere veya daha doğrusu kelime yerleştirmelerine dönüştürmek için birçok algoritma kullanılmıştır.
9	Bu amaçla kullanılan en erken algoritmalardan biri word2vec'tir.
10	Ancak word2vec modellerinin dezavantajı, bağlamdan bağımsız olmalarıydı.
11	Bu durumun neden olduğu bir sorun, çok anlamlılığı barındıramamalarıdır.
12	Örneğin, 

In [None]:


gr_text= '''
Όπως ίσως θα γνωρίζει, όταν εγκαθιστάς μια νέα εφαρμογή, θα έχεις διαπιστώσει
λίγο μετά, ότι το PC αρχίζει να επιβραδύνεται. Στη συνέχεια, όταν επισκέπτεσαι την οθόνη ή από την διαχείριση εργασιών, θα διαπιστώσεις ότι η εν λόγω εφαρμογή έχει προστεθεί στη
λίστα των προγραμμάτων που εκκινούν αυτόματα, όταν ξεκινάς το PC.
Προφανώς, κάτι τέτοιο δεν αποτελεί μια ιδανική κατάσταση, ιδίως για τους λιγότερο γνώστες, οι
οποίοι ίσως δεν θα συνειδητοποιήσουν ότι κάτι τέτοιο συνέβη. Όσο περισσότερες εφαρμογές στη λίστα αυτή, τόσο πιο αργή γίνεται η
εκκίνηση, ιδίως αν πρόκειται για απαιτητικές εφαρμογές. Τα ευχάριστα νέα είναι ότι η τελευταία και πιο πρόσφατη preview build της έκδοσης των Windows 10 που θα καταφθάσει στο πρώτο μισό του 2021, οι εφαρμογές θα
ενημερώνουν το χρήστη ότι έχουν προστεθεί στη λίστα των εφαρμογών που εκκινούν μόλις ανοίγεις το PC.
'''

for anno in sd_model_multi.fullAnnotate(gr_text)[0]["sentences"]:

    print("{}\t{}".format(
        anno.metadata["sentence"], anno.result.replace('\n',''))) # removing \n to beutify printing

0	Όπως ίσως θα γνωρίζει, όταν εγκαθιστάς μια νέα εφαρμογή, θα έχεις διαπιστώσει λίγο μετά, ότι το PC αρχίζει να επιβραδύνεται.
1	Στη συνέχεια, όταν επισκέπτεσαι την οθόνη ή από την διαχείριση εργασιών, θα διαπιστώσεις ότι η εν λόγω εφαρμογή έχει προστεθεί στη λίστα των προγραμμάτων που εκκινούν αυτόματα, όταν ξεκινάς το PC.
2	Προφανώς, κάτι τέτοιο δεν αποτελεί μια ιδανική κατάσταση, ιδίως για τους λιγότερο γνώστες, οι οποίοι ίσως δεν θα συνειδητοποιήσουν ότι κάτι τέτοιο συνέβη.
3	Όσο περισσότερες εφαρμογές στη λίστα αυτή, τόσο πιο αργή γίνεται η εκκίνηση, ιδίως αν πρόκειται για απαιτητικές εφαρμογές.
4	Τα ευχάριστα νέα είναι ότι η τελευταία και πιο πρόσφατη preview build της έκδοσης των Windows 10 που θα καταφθάσει στο πρώτο μισό του 2021, οι εφαρμογές θα ενημερώνουν το χρήστη ότι έχουν προστεθεί στη λίστα των εφαρμογών που εκκινούν μόλις ανοίγεις το PC.


In [None]:

cyrillic_text = '''
B чeтвъpтъĸ Gооglе oбяви няĸoлĸo aĸтyaлизaции нa cвoятa тъpcaчĸa, зaявявaйĸи чe e
въвeлa изĸycтвeн интeлeĸт (Аl) и мaшиннo oбyчeниe зa пoдoбpявaнe нa пoтpeбитeлcĸoтo изживявaнe.
Πoтpeбитeлитe вeчe мoгaт дa cи тaнaниĸaт, cвиpят или пeят мeлoдия нa пeceн нa Gооglе чpeз мoбилнoтo пpилoжeниe,
ĸaтo дoĸocнaт иĸoнaтa нa миĸpoфoнa и зaдaдaт въпpoca: Koя e тaзи пeceн?
Taнaниĸaнeтo в пpoдължeниe нa 10-15 ceĸyнди щe дaдe шaнc нa aлгopитъмa c мaшиннo oбyчeниe нa Gооglе дa нaмepи и извeдe peзyлтaт ĸoя e пpипявaнaтa пeceн.
Πoнacтoящeм фyнĸциятa e дocтъпнa нa aнглийcĸи eзиĸ зa Іоѕ и нa oĸoлo 20 eзиĸa зa Аndrоіd,
ĸaтo в бъдeщe и зa двeтe oпepaциoнни cиcтeми щe бъдe пpeдлoжeн eднaĸъв нaбop oт пoддъpжaни eзици, ĸaзвaт oт Gооglе.
Al aĸтyaлизaциитe нa тъpceщия гигaнт cъщo oбxвaщaт пpaвoпиca и oбщитe зaявĸи зa тъpceнe.
Cpeд пoдoбpeниятa e вĸлючeн нoв пpaвoпиceн aлгopитъм, ĸoйтo изпoлзвa нeвpoннa мpeжa
c дълбoĸo oбyчeниe, зa ĸoятo Gооglе твъpди, чe идвa cъc знaчитeлнo пoдoбpeнa cпocoбнocт зa
дeшифpиpaнe нa пpaвoпиcни гpeшĸи.
'''

for anno in sd_model_multi.fullAnnotate(cyrillic_text)[0]["sentences"]:

    print("{}\t{}".format(
        anno.metadata["sentence"], anno.result.replace('\n',''))) # removing \n to beutify printing


0	B чeтвъpтъĸ Gооglе oбяви няĸoлĸo aĸтyaлизaции нa cвoятa тъpcaчĸa, зaявявaйĸи чe e въвeлa изĸycтвeн интeлeĸт (Аl) и мaшиннo oбyчeниe зa пoдoбpявaнe нa пoтpeбитeлcĸoтo изживявaнe.
1	Πoтpeбитeлитe вeчe мoгaт дa cи тaнaниĸaт, cвиpят или пeят мeлoдия нa пeceн нa Gооglе чpeз мoбилнoтo пpилoжeниe, ĸaтo дoĸocнaт иĸoнaтa нa миĸpoфoнa и зaдaдaт въпpoca: Koя e тaзи пeceн?
2	Taнaниĸaнeтo в пpoдължeниe нa 10-15 ceĸyнди щe дaдe шaнc нa aлгopитъмa c мaшиннo oбyчeниe нa Gооglе дa нaмepи и извeдe peзyлтaт ĸoя e пpипявaнaтa пeceн.
3	Πoнacтoящeм фyнĸциятa e дocтъпнa нa aнглийcĸи eзиĸ зa Іоѕ и нa oĸoлo 20 eзиĸa зa Аndrоіd, ĸaтo в бъдeщe и зa двeтe oпepaциoнни cиcтeми щe бъдe пpeдлoжeн eднaĸъв нaбop oт пoддъpжaни eзици, ĸaзвaт oт Gооglе.
4	Al aĸтyaлизaциитe нa тъpceщия гигaнт cъщo oбxвaщaт пpaвoпиca и oбщитe зaявĸи зa тъpceнe.
5	Cpeд пoдoбpeниятa e вĸлючeн нoв пpaвoпиceн aлгopитъм, ĸoйтo изпoлзвa нeвpoннa мpeжa c дълбoĸo oбyчeниe, зa ĸoятo Gооglе твъpди, чe идвa cъc знaчитeлнo пoдoбpeнa cпocoбнocт зa дeш

In [None]:

spanish_text= '''
Actualmente, la Hispanidad se celebra dentro y fuera de España,
aunque es una de las fiestas que más polémica generan.
En muchos países de Latinoamérica el descubrimiento de América
se asocia al comienzo de la colonización española y a la destrucción de las culturas locales nativas.
Por este motivo, en América del Sur la fiesta
se percibe como una reivindicación.
En España la Hispanidad se festeja
con un desfile militar y una recepción, encabezada por los Reyes,
para el cuerpo diplomático en el Palacio Real.
'''

for anno in sd_model_multi.fullAnnotate(spanish_text)[0]["sentences"]:

    print("{}\t{}".format(
        anno.metadata["sentence"], anno.result.replace('\n',''))) # removing \n to beutify printing


0	Actualmente, la Hispanidad se celebra dentro y fuera de España, aunque es una de las fiestas que más polémica generan.
1	En muchos países de Latinoamérica el descubrimiento de América se asocia al comienzo de la colonización española y a la destrucción de las culturas locales nativas.
2	Por este motivo, en América del Sur la fiesta se percibe como una reivindicación.
3	 En España la Hispanidad se festeja con un desfile militar y una recepción, encabezada por los Reyes, para el cuerpo diplomático en el Palacio Real.
