In this tutorial, we build a simple matrix factorization model using the [MovieLens 100K dataset](https://grouplens.org/datasets/movielens/100k/) with TensorFlow Recommender System (TFRS) using Amazon SageMaker. 

We will use this model to recommend movies for a given user.

In [1]:
!pip install -q sagemaker==2.9.2
!pip install -q sagemaker-experiments==0.1.24
!pip install -q tensorflow==2.3.0
!pip install -q tensorflow-recommenders==0.2.0
!pip install -q tensorflow-datasets==4.0.0

^C
[31mERROR: Operation cancelled by user[0m
^C
Traceback (most recent call last):
  File "/opt/conda/bin/pip", line 11, in <module>
    sys.exit(main())
  File "/opt/conda/lib/python3.7/site-packages/pip/_internal/cli/main.py", line 73, in main
    command = create_command(cmd_name, isolated=("--isolated" in cmd_args))
  File "/opt/conda/lib/python3.7/site-packages/pip/_internal/commands/__init__.py", line 96, in create_command
    module = importlib.import_module(module_path)
  File "/opt/conda/lib/python3.7/importlib/__init__.py", line 127, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
  File "<frozen importlib._bootstrap>", line 983, in _find_and_load
  File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 677, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<froze

In [2]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

# Specify Input Data S3 URI and `Distribution Strategy`

In [3]:
from sagemaker.inputs import TrainingInput

input_train_data_s3_uri ='s3://sagemaker-us-east-1-835319576252/tensorflow_datasets/train/'

s3_input_train_data = TrainingInput(s3_data=input_train_data_s3_uri,
                                    distribution='ShardedByS3Key')
print(s3_input_train_data.config)

{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://sagemaker-us-east-1-835319576252/tensorflow_datasets/train/', 'S3DataDistributionType': 'ShardedByS3Key'}}}


# Setup Metrics To Track Model Performance

These sample log lines...
```
499/500 [=====>..] - ETA: 3s - root_mean_squared_error: 1.194 - factorized_top_k/top_10_categorical_accuracy: 0.481 - factorized_top_k/top_50_categorical_accuracy: 0.607 - factorized_top_k/top_100_categorical_accuracy: 0.885
```
...will produce the following metrics in CloudWatch:

`root_mean_squared_error` = 1.194

`factorized_top_k/top_10_categorical_accuracy` = 0.481

`factorized_top_k/top_50_categorical_accuracy` = 0.607

`factorized_top_k/top_100_categorical_accuracy` = 0.885

In [4]:
metrics_definitions = [
     {'Name': 'root_mean_squared_error', 'Regex': 'root_mean_squared_error: ([0-9\\.]+)'},    
     {'Name': 'top_10_categorical_accuracy', 'Regex': 'factorized_top_k/top_10_categorical_accuracy: ([0-9\\.]+)'},
     {'Name': 'top_50_categorical_accuracy', 'Regex': 'factorized_top_k/top_50_categorical_accuracy: ([0-9\\.]+)'},
     {'Name': 'top_100_categorical_accuracy', 'Regex': 'factorized_top_k/top_100_categorical_accuracy: ([0-9\\.]+)'}
]

# Setup Hyper-Parameters for Classification Layer

In [5]:
epochs=500
learning_rate=0.5
dataset_variant='100k' # movielens 100k, 1m, 20m, 25m, etc
embedding_dimension=32 # dimension (k) of our user and item embeddings
enable_tensorboard=True
train_instance_count=1
train_instance_type='ml.p3.2xlarge'

# Setup Our TensorFlow Script to Run on SageMaker
Prepare our TensorFlow model to run on the managed SageMaker service

In [6]:
!pygmentize /root/workshop/02_usecases/sagemaker_recommendations/src/train.py

[34mimport[39;49;00m [04m[36mtime[39;49;00m
[34mimport[39;49;00m [04m[36mrandom[39;49;00m
[34mimport[39;49;00m [04m[36mpandas[39;49;00m [34mas[39;49;00m [04m[36mpd[39;49;00m
[34mfrom[39;49;00m [04m[36mglob[39;49;00m [34mimport[39;49;00m glob
[34mimport[39;49;00m [04m[36mpprint[39;49;00m
[34mimport[39;49;00m [04m[36margparse[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m
[34mimport[39;49;00m [04m[36msubprocess[39;49;00m
[34mimport[39;49;00m [04m[36msys[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m

subprocess.check_call([sys.executable, [33m'[39;49;00m[33m-m[39;49;00m[33m'[39;49;00m, [33m'[39;49;00m[33mpip[39;49;00m[33m'[39;49;00m, [33m'[39;49;00m[33minstall[39;49;00m[33m'[39;49;00m, [33m'[39;49;00m[33mscikit-learn==0.23.1[39;49;00m[33m'[39;49;00m])
subprocess.check_call([sys.executable, [33m'[39;49;00m[33m-m[39;49;00m[33m'[39;49;00m, [33m'[39;49;00m[33mpip[39;49;00m[33m'[39;49;00m,

In [7]:
from sagemaker.tensorflow import TensorFlow

estimator = TensorFlow(entry_point='train.py',
                       source_dir='/root/workshop/02_usecases/sagemaker_recommendations/src',
                       role=role,
                       instance_count=train_instance_count,
                       instance_type=train_instance_type,
                       py_version='py37',
                       framework_version='2.3.0',
                       hyperparameters={
                           'epochs': epochs,
                           'learning_rate': learning_rate,
                           'dataset_variant': dataset_variant,
                           'embedding_dimension': embedding_dimension,                           
                           'enable_tensorboard': enable_tensorboard
                       },
                       metric_definitions=metrics_definitions,
                       debugger_hook_config=False
            )

# Create the Experiment

In [8]:
import time
from smexperiments.experiment import Experiment

timestamp = int(time.time())

recommender_experiment = Experiment.create(
                         experiment_name='MovieLens-Recommender-{}'.format(timestamp),
                         description='MovieLens Recommender', 
                         sagemaker_boto_client=sm)

recommender_experiment_name = recommender_experiment.experiment_name
print('Experiment name: {}'.format(recommender_experiment_name))

Experiment name: MovieLens-Recommender-1604389575


In [9]:
import time
from smexperiments.trial import Trial

timestamp = int(time.time())

trial_name = 'trial-{}-{}-{}-{}'.format(timestamp, epochs, dataset_variant, embedding_dimension)

trial = Trial.create(trial_name=trial_name,
                     experiment_name=recommender_experiment_name,
                     sagemaker_boto_client=sm)

trial_name = trial.trial_name
print('Trial name: {}'.format(trial_name))

Trial name: trial-1604389575-1000-100k-256


In [10]:
recommender_experiment_config = {
    'ExperimentName': recommender_experiment_name,
    'TrialName': trial.trial_name,
    'TrialComponentDisplayName': 'train'
}

# Train the Model on SageMaker

In [11]:
estimator.fit(
              inputs={
                  'train': s3_input_train_data, 
              },              
              experiment_config=recommender_experiment_config,                   
              wait=False)

INFO:sagemaker:Creating training-job with name: tensorflow-training-2020-11-03-07-46-15-771


In [12]:
recommender_training_job_name = estimator.latest_training_job.name
print('Training Job Name:  {}'.format(recommender_training_job_name))

Training Job Name:  tensorflow-training-2020-11-03-07-46-15-771


In [13]:
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/sagemaker/home?region={}#/jobs/{}">Training Job</a></b>'.format(region, recommender_training_job_name)))


In [14]:
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/cloudwatch/home?region={}#logStream:group=/aws/sagemaker/TrainingJobs;prefix={};streamFilter=typeLogStreamPrefix">CloudWatch Logs</a></b>'.format(region, recommender_training_job_name)))


In [15]:
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="blank" href="https://s3.console.aws.amazon.com/s3/buckets/{}/{}/?region={}&tab=overview">S3 Output Data</a> After The Training Job Has Completed</b>'.format(bucket, recommender_training_job_name, region)))


# Wait for Training Job to Finish

In [None]:
%%time

estimator.latest_training_job.wait(logs=False)


2020-11-03 07:46:17 Starting - Starting the training job
2020-11-03 07:46:22 Starting - Launching requested ML instances

# Copy the Trained Model from S3

In [None]:
!aws s3 cp s3://$bucket/$recommender_training_job_name/output/model.tar.gz ./model.tar.gz

In [None]:
!mkdir -p ./model/
!tar -xvzf ./model.tar.gz -C ./model/

# Inspect the Model

In [None]:
!saved_model_cli show --all --dir ./model/tensorflow/saved_model/0/

# Make a Sample Prediction

In [None]:
user_id = "42"

In [None]:
!saved_model_cli run --input_exprs 'input_1=np.array(["$user_id"])' --tag_set serve --signature_def serving_default --dir ./model/tensorflow/saved_model/0

# Show the Experiment Tracking Lineage

In [None]:
from sagemaker.analytics import ExperimentAnalytics

lineage_table = ExperimentAnalytics(
    sagemaker_session=sess,
    experiment_name=recommender_experiment_name,
    metric_names=[
        'root_mean_squared_error',        
        'top_10_categorical_accuracy',
        'top_50_categorical_accuracy',
        'top_100_categorical_accuracy'
    ],
    sort_by="CreationTime",
    sort_order="Ascending",
)

lineage_df = lineage_table.dataframe()
lineage_df.shape

In [None]:
lineage_df.columns

In [None]:
lineage_df

In [None]:
sm.describe_trial_component(TrialComponentName=lineage_df.TrialComponentName[0])

# Pass Variables to the Next Notebook(s)

In [None]:
%store recommender_training_job_name