In [None]:
import boto3
import sagemaker

session = boto3.session.Session()
aws_region = session.region_name
s3_bucket  = # s3 bucket name

try:
    s3_client = boto3.client('s3')
    response = s3_client.get_bucket_location(Bucket=s3_bucket)
    print(f"Bucket region: {response['LocationConstraint']}")
except:
    print(f"Access Error: Check if '{s3_bucket}' S3 bucket is in '{aws_region}' region")

In [None]:
s3_prefix = "models/blazing-text/classification/dbpedia"
s3_output_location = f"s3://{s3_bucket}/{s3_prefix}"
print(f"Model output location:{s3_output_location}")

In [None]:
container = sagemaker.image_uris.retrieve("blazingtext", aws_region, "1")
print(f"Using SageMaker BlazingText container: {container} ({aws_region})")

In [None]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker
import time

sess = sagemaker.Session()
role = sagemaker.get_execution_role()
print(role)

In [None]:
sm = boto3.client("sagemaker")
with Tracker.create(display_name="Dataset", sagemaker_boto_client=sm) as tracker:
    # we can log the s3 uri to the dataset used for training
    tracker.log_input(
        name="dbpedia-dataset",
        media_type="s3/uri",
        value=f"s3://{s3_bucket}/blazing-text/classification/dbpedia/train",  # specify S3 URL to your dataset
    )

In [None]:
bt_classify_exp = Experiment.create(
    experiment_name=f"bt-classify-{int(time.time())}",
    description="Blazing Text Classification",
    sagemaker_boto_client=sm,
)

In [None]:
hyperparameters = {
    "mode": "supervised",
    "word_ngrams": "2",
    "patience": "4",
    "min_epochs": "15",
    "min_count": "5",
    "learning_rate": "0.05",
}

In [None]:
from sagemaker.inputs import TrainingInput
s3_train = f"s3://{s3_bucket}/blazing-text/classification/dbpedia/train"

train_input = TrainingInput(s3_data=s3_train, 
                            distribution="FullyReplicated", 
                            s3_data_type="S3Prefix", 
                            input_mode="File")

s3_validation = f"s3://{s3_bucket}/blazing-text/classification/dbpedia/validation"

validation_input = TrainingInput(s3_data=s3_train, 
                            distribution="FullyReplicated", 
                            s3_data_type="S3Prefix", 
                            input_mode="File")

data_channels = {"train": train_input, "validation": validation_input}

In [None]:
trial_params = [ (100, 30, False),  (200, 40, True)]

for vector_dim, epochs, early_stopping in trial_params:
    
    hyperparameters['epochs'] = epochs
    hyperparameters['vector_dim'] = vector_dim
    hyperparameters['early_stopping'] = early_stopping
    
    trial_name = f"bt-classify-{int(time.time())}"
    bt_trial = Trial.create(
                        trial_name=trial_name, 
                        experiment_name=bt_classify_exp.experiment_name,
                        sagemaker_boto_client=sm,
    )
    
    # associate the proprocessing trial component with the current trial
    bt_trial.add_trial_component(tracker.trial_component)

    bt_model = sagemaker.estimator.Estimator(
                        container,
                        role,
                        instance_count=1,
                        instance_type="ml.c5.4xlarge",
                        volume_size=100,
                        max_run=360000,
                        input_mode="File",
                        output_path=s3_output_location,
                        sagemaker_session=sess,
                        hyperparameters=hyperparameters
                )
    
   

    bt_model.fit(inputs=data_channels, 
                job_name=bt_trial.trial_name,
                logs=True,  
                experiment_config={"TrialName": bt_trial.trial_name, "TrialComponentDisplayName": "Training"},
                wait=False)

    # sleep in between starting two trials
    time.sleep(2)

In [None]:
search_expression = {
    "Filters": [
        {
            "Name": "DisplayName",
            "Operator": "Equals",
            "Value": "Training",
        }
    ],
}

In [None]:
from sagemaker.analytics import ExperimentAnalytics

trial_component_analytics = ExperimentAnalytics(
    sagemaker_session=sess,
    experiment_name=bt_classify_exp.experiment_name,
    search_expression=search_expression,
    sort_by="metrics.validation:accuracy.max",
    sort_order="Descending",
    parameter_names=["vector_dim", "epochs", "early_stopping"],
)

In [None]:
analytic_table = trial_component_analytics.dataframe()
for col in analytic_table.columns:
    print(col)

In [None]:
train_map = analytic_table[
    ["vector_dim", "epochs", "early_stopping", "train:accuracy - Avg", "train:accuracy - StdDev" ]
]
train_map

In [None]:
validation_map = analytic_table[
    ["vector_dim", "epochs", "early_stopping", "validation:accuracy - Avg", "validation:accuracy - StdDev" ]
]
validation_map