# Notebook to run docker building and running

## Step 1: build our own docker image

### Prerequisites

- An AWS account
- Configure credential of aws cli(the credential has sagemaker, ecr permissions)
- Install Docker Engine

In [None]:
! aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-east-1.amazonaws.com

# run below line if you are using AWS China regions
#! aws ecr get-login-password --region cn-north-1 | docker login --username AWS --password-stdin 727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn

In [None]:
image_name = 'fraud-detection-with-gnn-on-dgl/training'
! docker build -t $image_name ./FD_SL_DGL/gnn_fraud_detection_dgl

# run below line if you are using AWS China regions
# ! docker build --build-arg=IMAGE_REPO=727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn -t fraud-detection-with-gnn-on-dgl/training ./FD_SL_DGL/gnn_fraud_detection_dgl

## Step 2: Test this docker image

**IMPORTANT**: Restore the variables from previous notebook

In [None]:
%store -r

In [None]:
from sagemaker import get_execution_role, Session

role = get_execution_role()
sess = Session()

print(role)
print(sess)

In [None]:
model_output_folder = 'model_output'

output_path = f's3://{default_bucket}/{model_output_folder}'

print(processed_data)
print(output_path)

from os import path
from sagemaker.s3 import S3Downloader
processed_files = S3Downloader.list(processed_data)
print("===== Processed Files =====")
print('\n'.join(processed_files))

In [None]:
edges = ",".join(map(lambda x: x.split("/")[-1], [file for file in processed_files if "relation" in file]))
params = {'nodes' : 'features.csv',
          'edges': 'relation*',
          'labels': 'tags.csv',
          'embedding-size': 64,
          'n-layers': 2,
          'n-epochs': 10,
          'optimizer': 'adam',
          'lr': 1e-2
        }

In [None]:
from sagemaker.estimator import Estimator
from time import strftime, gmtime

estimator = Estimator(image_uri=image_name,
                      role=role,
                      train_instance_count=1,
                      train_instance_type='local',
                      hyperparameters=params,
                      output_path=output_path)

training_job_name = "{}-{}".format('GNN_FD_SL_DGL_Train', strftime("%Y-%m-%d-%H-%M-%S", gmtime()))
print(training_job_name)

estimator.fit({'train': processed_data}, job_name=training_job_name)

In [None]:
model_path = f'{output_path}/{training_job_name}'
%store model_path