In [None]:
import boto3
import sagemaker

session = boto3.session.Session()
aws_region = session.region_name
s3_bucket  =  # s3 bucket name

try:
    s3_client = boto3.client('s3')
    response = s3_client.get_bucket_location(Bucket=s3_bucket)
    print(f"Bucket region: {response['LocationConstraint']}")
except:
    print(f"Access Error: Check if '{s3_bucket}' S3 bucket is in '{aws_region}' region")

In [None]:
s3_prefix = "models/blazing-text/word2vec/dbpedia"
s3_output_location = f"s3://{s3_bucket}/{s3_prefix}"
print(f"Model output location:{s3_output_location}")

In [None]:
container = sagemaker.image_uris.retrieve("blazingtext", aws_region, "1")
print(f"Using SageMaker BlazingText container: {container} ({aws_region})")

In [None]:
sess = sagemaker.Session()
role = sagemaker.get_execution_role()
print(role)

bt_model = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type="ml.c5.4xlarge",
    volume_size=100,
    max_run=360000,
    input_mode="File",
    output_path=s3_output_location,
    sagemaker_session=sess,
)

In [None]:
bt_model.set_hyperparameters(
    min_count=5,
    sampling_threshold=0.0001,
    learning_rate=0.05,
    evaluation=True,  # Perform similarity evaluation on WS-353 dataset at the end of training
    subwords=True,
)

In [None]:
from sagemaker.tuner import IntegerParameter
from sagemaker.tuner import CategoricalParameter

objective_metric_name = "train:mean_rho"

vector_dim = IntegerParameter(100,200)
window_size = IntegerParameter(5,10)
negative_samples = IntegerParameter(5,10)
mode = CategoricalParameter(["skipgram", "cbow"])
epochs = IntegerParameter(10,50)

hyperparameter_ranges={}
hyperparameter_ranges['vector_dim'] = vector_dim
hyperparameter_ranges['window_size'] = window_size
hyperparameter_ranges['negative_samples'] = negative_samples
hyperparameter_ranges["mode"] = mode
hyperparameter_ranges["epochs"] = epochs

In [None]:
from sagemaker.tuner import HyperparameterTuner

hyperparameter_tuner=HyperparameterTuner(bt_model, 
                                         objective_metric_name, 
                                         hyperparameter_ranges, 
                                         strategy='Bayesian', 
                                         objective_type='Maximize', 
                                         max_jobs=20, 
                                         max_parallel_jobs=1, 
                                         base_tuning_job_name='blazingtext-tuning')

In [None]:
from sagemaker.inputs import TrainingInput
s3_train = f"s3://{s3_bucket}/blazing-text/word2vec/dbpedia"

train_input = TrainingInput(s3_data=s3_train, 
                            distribution="FullyReplicated", 
                            s3_data_type="S3Prefix", 
                            input_mode="File")

data_channels = {"train": train_input}

In [None]:
hyperparameter_tuner.fit(inputs=data_channels, wait=False, logs=True)