In [10]:
import sagemaker
from sagemaker import get_execution_role
import boto3
import json

my_session = sagemaker.Session()

role = get_execution_role()

s3 = boto3.resource('s3')
sagemaker_session = sagemaker.Session()
s3_bucket = sagemaker_session.default_bucket()
s3_prefix = 'aws-machine-learning-specialty/algorithms/blazing-text/fast-text'

In [5]:
blazing_text_container = sagemaker.image_uris.retrieve( "blazingtext",boto3.Session().region_name,version="latest")
print('Using SageMaker BlazingText container: {} ({})'.format(blazing_text_container, boto3.Session().region_name))

Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: latest.


Using SageMaker BlazingText container: 811284229777.dkr.ecr.us-east-1.amazonaws.com/blazingtext:1 (us-east-1)


In [6]:
!wget -O model.bin https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin

--2021-02-02 20:13:47--  https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.75.142, 104.22.74.142, 172.67.9.4, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.75.142|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 131266198 (125M) [application/octet-stream]
Saving to: ‘model.bin’


2021-02-02 20:13:53 (24.6 MB/s) - ‘model.bin’ saved [131266198/131266198]



In [11]:
!tar -czvf langid.tar.gz model.bin
blazing_text_model_location = my_session.upload_data("langid.tar.gz", bucket=s3_bucket, key_prefix=s3_prefix)
!rm langid.tar.gz model.bin

model.bin


In [None]:
language_identifier = sagemaker.Model(blazing_text_container,
                                      model_data=blazing_text_model_location, 
                                      role=role, sagemaker_session=my_session)

language_identifier.deploy(initial_instance_count = 1,instance_type = 'ml.m4.xlarge')


In [19]:
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import CSVSerializer, JSONSerializer

language_identifier_predictor = sagemaker.predictor.Predictor(endpoint_name=language_identifier.endpoint_name, 
                                   sagemaker_session=my_session,
                                   serializer=JSONSerializer(),
                                   deserializer=JSONDeserializer())

In [20]:
some_language_examples = ["À quoi sert l'intelligence artificielle",
             "Was ist der Zweck der künstlichen Intelligenz?",
             "Wat is die doel van kunsmatige intelligensie",
             "ما هو الغرض من الذكاء الاصطناعي",
             "Süni intellektin məqsədi nədir",
             "Hvad er formålet med kunstig intelligens"]
prediction_input = {"instances" : some_language_examples}

In [21]:
language_predictions = language_identifier_predictor.predict(prediction_input)
print(language_predictions)

[{'label': ['__label__fr'], 'prob': [0.8571585416793823]}, {'label': ['__label__de'], 'prob': [0.9994584321975708]}, {'label': ['__label__af'], 'prob': [0.465190052986145]}, {'label': ['__label__ar'], 'prob': [0.9983780980110168]}, {'label': ['__label__az'], 'prob': [0.9949907064437866]}, {'label': ['__label__da'], 'prob': [0.864094614982605]}]


In [22]:
# Remove the '__label__' before each language identifier in the prediction output
# and change the label and prob to more readable values
for output in language_predictions:
    output['label'] = output['label'][0][9:].upper() # remove __label__ preceding the language identifier
    output['language'] = output.pop('label')         # make the labels 
    output['probability'] = output.pop('prob')       # readable

print(language_predictions)

[{'language': 'FR', 'probability': [0.8571585416793823]}, {'language': 'DE', 'probability': [0.9994584321975708]}, {'language': 'AF', 'probability': [0.465190052986145]}, {'language': 'AR', 'probability': [0.9983780980110168]}, {'language': 'AZ', 'probability': [0.9949907064437866]}, {'language': 'DA', 'probability': [0.864094614982605]}]


In [24]:
sagemaker.Session().delete_endpoint(language_identifier.endpoint_name)
