# Building and Training NMF Model Training Encoder

In [2]:
import boto3,os



In [4]:
region = boto3.session.Session().region_name
account_id = boto3.client('sts').get_caller_identity().get('Account')
ecr_repository = 'sagemaker-nmf-container'
tag = ':nmflatest'
training_repository_uri = f'{account_id}.dkr.ecr.{region}.amazonaws.com/{ecr_repository}'

In [5]:
training_repository_uri

'064258348567.dkr.ecr.us-east-1.amazonaws.com/sagemaker-nmf-container'

In [6]:
from sagemaker import get_execution_role
from sagemaker.tuner import IntegerParameter, HyperparameterTuner
role = get_execution_role()
role

'arn:aws:iam::064258348567:role/service-role/AmazonSageMaker-ExecutionRole-20200131T104492'

In [7]:
!docker build -t $ecr_repository .

Sending build context to Docker daemon  40.96kB
Step 1/10 : FROM python:3.7-slim-buster
3.7-slim-buster: Pulling from library/python

[1B5a41d630: Pulling fs layer 
[1B8b7b4c32: Pulling fs layer 
[1Ba3eb7bae: Pulling fs layer 
[1Be182f49d: Pulling fs layer 
[1BDigest: sha256:621071df55342b84610ca33de46be6a5a28465b3c368d394144de24689764a651A[2K[3A[2K[1A[2K[3A[2K[5A[2K[3A[2K[5A[2K[3A[2K[5A[2K[3A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[4A[2K[4A[2K[4A[2K[4A[2K[3A[2K[3A[2K[3A[2K[3A[2K[3A[2K[3A[2K[2A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K
Status: Downloaded newer image for python:3.7-slim-buster
 ---> b142ab477505
Step 2/10 : MAINTAINER ASC "mbod@asc.upenn.edu jmparelman@gmail.com"
 ---> Running in 6062ae7c259d
Removing intermediate container 6062ae7c259d
 ---> b02aa2809e8a
Step 3/10 : RUN mkdir -p /data/input /data/output
 ---> Running in e63f2c599014
Removing

In [8]:
!aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 064258348567.dkr.ecr.us-east-1.amazonaws.com


https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded


In [9]:
!aws ecr create-repository --repository-name $ecr_repository



An error occurred (RepositoryAlreadyExistsException) when calling the CreateRepository operation: The repository with name 'sagemaker-nmf-container' already exists in the registry with id '064258348567'


In [10]:
!docker tag {ecr_repository} "064258348567.dkr.ecr.us-east-1.amazonaws.com/sagemaker-nmf-container:latest"
!docker push "064258348567.dkr.ecr.us-east-1.amazonaws.com/sagemaker-nmf-container:latest"

The push refers to repository [064258348567.dkr.ecr.us-east-1.amazonaws.com/sagemaker-nmf-container]

[1Bb0e67241: Preparing 
[1Be2b410c9: Preparing 
[1Bee7d5f7b: Preparing 
[1B30260ee7: Preparing 
[1B6a810ca1: Preparing 
[1B6bb9ffea: Preparing 
[1B663e622b: Preparing 
[1B845af46d: Preparing 
[7Bee7d5f7b: Pushed   346.8MB/340.1MB[9A[2K[8A[2K[5A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A[2K[7A[2K[8A[2K[7A[2K[7A[2K[5A[2K[7A[2K[8A[2K[5A[2K[8A[2K[7A[2K[8A[2K[5A[2K[5A[2K[8A[2K[6A[2K[8A[2K[7A[2K[5A[2K[7A[2K[5A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A[2K[7A[2K[8A[2K[7A[2K[8A[2K[3A[2K[5A[2K[7A[2K[5A[2K[8A[2K[5A[2K[2A[2K[7A[2K[5A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A[2K[7A[2K[8A[2K[8A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A[2K[8A[2K[7A

In [11]:
import sagemaker as sage
from time import gmtime,strftime

sess = sage.Session()

In [12]:
data_location = 's3://ascsagemaker/JMP_congressional_nmf/dtms/110.pkl'
image = "064258348567.dkr.ecr.us-east-1.amazonaws.com/sagemaker-nmf-container:latest"

nmf = sage.estimator.Estimator(image,
                              role,1, 'ml.m4.2xlarge',
                              output_path = 's3://ascsagemaker/JMP_congressional_nmf/models',
                              sagemaker_session=sess)

In [21]:
hyperparameter_ranges = {"k":IntegerParameter(30,34)}
objective_metric_name = 'coherence'
metric_definitions = [{'Name': 'coherence',
                       'Regex': 'coherence: ([0-9])'}]


In [28]:
tuner = HyperparameterTuner(nmf,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions=metric_definitions,
                            max_jobs=3,
                            max_parallel_jobs=3,
                           base_tuning_job_name='congress-110')

In [29]:
tuner.fit(data_location)

..............................................................................!
