# Initializing the Roles

In [None]:
import sagemaker
from sagemaker import get_execution_role
import boto3
import json

sess = sagemaker.Session()
role = get_execution_role()
bucket = 'slytherins-test'
prefix = 'sagemaker/blazingtext'
region_name = boto3.Session().region_name

# Downloading the Text8 Dataset and Decompressing it

In [None]:
!wget http://mattmahoney.net/dc/text8.zip -O text8.gz
!gzip -d text8.gz -f

# Uploading the Data in S3

In [None]:
train_channel = prefix + '/train'
sess.upload_data(path='text8', bucket=bucket, key_prefix=train_channel)
s3_train_data = 's3://{}/{}'.format(bucket, train_channel)
s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)

# Getting Blazing Text Container Path

In [None]:
container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, "blazingtext", "latest")

# Initializing Blazing Text Container

In [None]:
bt_model = sagemaker.estimator.Estimator(container,
                                         role, 
                                         train_instance_count=2, 
                                         train_instance_type='ml.m4.xlarge',
                                         train_volume_size = 5,
                                         train_max_run = 360000,
                                         input_mode= 'File',
                                         output_path=s3_output_location,
                                         sagemaker_session=sess)

# Setting Hyper Parameters

In [None]:
bt_model.set_hyperparameters(mode="batch_skipgram",
                             epochs=5,
                             min_count=5,
                             sampling_threshold=0.0001,
                             learning_rate=0.05,
                             window_size=5,
                             vector_dim=100,
                             negative_samples=5,
                             batch_size=11,
                             evaluation=True,
                             subwords=False)

# Defining S3 Input

In [None]:
train_data = sagemaker.session.s3_input(s3_train_data, content_type='text/plain', s3_data_type='S3Prefix')
data_channels = {'train': train_data}

# Starting the Training

In [None]:
bt_model.fit(inputs=data_channels, logs=True)

# Deploying the Model

In [None]:
bt_endpoint = bt_model.deploy(initial_instance_count = 1,instance_type = 'ml.m4.xlarge')

# Getting Inferences

In [None]:
words = ["awesome", "blazing"]
payload = {"instances" : words}
response = bt_endpoint.predict(json.dumps(payload))
vecs = json.loads(response)
print(vecs)

# Delete Endpoints

In [None]:
sess.delete_endpoint(bt_endpoint.endpoint)