# Train Market1501 dataset

Train Market1501 in SageMaker

### Set up

#### 1. Set  up  accounts and role

In [None]:
import sagemaker
import boto3
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
sys.path.append('./src')


sagemaker_session = sagemaker.Session()
account_id =  boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name


#role = sagemaker.get_execution_role()
role="arn:aws:iam::{}:role/service-role/AmazonSageMaker-ExecutionRole-20190118T115449".format(account_id)


#### 2. Setup image and instance type

In [2]:
pytorch_custom_image_name="image-embedding:gpu-1.1.0-201909300947"
instance_type = "ml.p3.2xlarge" 
docker_repo = "{}.dkr.ecr.{}.amazonaws.com/{}".format(account_id, region, pytorch_custom_image_name)

#### 3. Configure train/ test and validation datasets

In [3]:
bucket = sagemaker_session.default_bucket()
raw_bucket="aegovansagemaker"

In [4]:
s3_train_raw = "s3://{}/merket1501/bounding_box_train/".format(raw_bucket)

In [5]:
s3_train="s3://{}/market1501/train3/".format(bucket)
s3_train_lst="s3://{}/market1501/train3_lst/".format(bucket)


s3_val="s3://{}/market1501/val3/".format(bucket)
s3_val_lst="s3://{}/market1501/val3_lst/".format(bucket)


s3_output_path= "s3://{}/market1501_output/".format(bucket)

## Start training

In [6]:
inputs = {
    "train" : s3_train,
    "val" :s3_val
}

In [7]:
hyperparameters = {
    "dataset":"Market1501TripletFactory",
    "batchsize": "32",
    "epochs" : "1000",
    "learning_rate":.0001,
    "weight_decay":5e-5,
    "momentum":.9,
    "patience": 20,
    "log-level" : "INFO",
    "tripletloss_margin":500
}

In [8]:
metric_definitions = [{"Name": "TrainLoss",
                     "Regex": "###score: train_loss### (\d*[.]?\d*)"}
                    ,{"Name": "ValidationLoss",
                     "Regex": "###score: val_loss### (\d*[.]?\d*)"}
                    ,{"Name": "TrainScore",
                     "Regex": "###score: train_score### (\d*[.]?\d*)"}
                   ,{"Name": "ValidationScore",
                     "Regex": "###score: val_score### (\d*[.]?\d*)"}
                    ,{"Name": "trainVariance",
                     "Regex": "###score: train_loss_std### (\d*[.]?\d*)"}
                    ,{"Name": "ValVariance",
                     "Regex": "###score: val_loss_std### (\d*[.]?\d*)"}
                    ]

In [9]:
!git log -1| head -1
!git log -1| tail -1

commit 08c62c86cc39da71b003269cdd219c15d801f314
    Revert to resnet 50


In [10]:
git_config = {'repo': 'https://github.com/elangovana/image-embedding.git',
              'branch': 'master',
             # 'commit': 'd14df4a3847e74c5672dae42304c0caa5a5c1ae2'
             }

In [11]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
     entry_point='experiment_train.py',
                    source_dir = 'src',
                    dependencies =['src/datasets', 'src/evaluators'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                    git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    #train_use_spot_instances = True
                    base_job_name ="image-embedding")

In [None]:
estimator.fit(inputs)

2019-10-04 00:14:58 Starting - Starting the training job...
2019-10-04 00:15:31 Starting - Launching requested ML instances..