In [None]:
import json
import boto3
import random
import sagemaker
import boto3
from IPython.display import display
from IPython.display import Markdown as md

from src.sm_utils import get_cognito_configs, get_signup_domain

In [None]:
with open("stack_outputs.json") as f:
    sagemaker_config = json.load(f)
s3_bucket = sagemaker_config["S3Bucket"]

## Import your own dataset
In this section, you can import your own dataset and start a Amazon SageMaker Ground Truth labelling job.
We will first initialise the custom data folder

In [None]:
!aws s3api put-object --bucket $s3_bucket --key custom_data/
!aws s3api put-object --bucket $s3_bucket --key custom_data/input/
!aws s3api put-object --bucket $s3_bucket --key custom_data/output/

You can run the following cells get the options to upload the dataset.

In [None]:
string = '''
# Copy your dataset into s3.

There are 2 options for you to import your own dataset. 

1. Manually drag your data into https://s3.console.aws.amazon.com/s3/buckets/{s3_bucket}/custom_data/input/
2. Copy the bucket with following command on your computer:

``` aws s3 cp --recursive <path to data> s3://{s3_bucket}/custom_data/input``` 
'''.format(s3_bucket=s3_bucket)

md(string)

### Create manifest folder with the data in S3
Iterate through the S3 bucket to automatically create a manifest file for Ground Truth

In [None]:
image_path_list = []

s3 = boto3.resource('s3')
input_bucket = s3.Bucket(s3_bucket)

input_bucket_itt = input_bucket.objects.filter(Prefix="custom_data/input/", Delimiter='/').all()
assert len(list(input_bucket_itt)) > 0, "Cannot create manifest for an empty directory"
for image in input_bucket_itt:    
    # Check if key is a directory
    if image.key[-1] == "/":
        continue
    image_path_list.append(image.key)
    
# Create and upload manifest file
with open("data.manifest", "w") as f:
    for image_path in image_path_list:
        string = '{{"source-ref": "s3://{}/{}"}}\n'.format(s3_bucket, image_path)
        f.write(string)

!aws s3 cp data.manifest s3://$s3_bucket/custom_data/data.manifest
!rm data.manifest

# Create a labelling job

In [None]:
sm_client = boto3.client('sagemaker')
co_client = boto3.client('cognito-idp')

### Create a work team if you don't already have one

In [None]:
cognito_config = get_cognito_configs()

if cognito_config is None:
    workteam_name = sagemaker_config["SolutionPrefix"] + "-workteam"
    response = sm_client.create_workteam(WorkteamName=workteam_name,
                                         Description="Labelling team for " + workteam_name,
                                         MemberDefinitions=[{
                                            "CognitoMemberDefinition":{
                                                'UserPool': sagemaker_config["CognitoUserPool"],
                                                'UserGroup':'{}-userpool-group'.format(sagemaker_config["SolutionPrefix"]),
                                                'ClientId': sagemaker_config["CongitoClientID"]}
                                         }]
                                        )

    workteam_arn = sm_client.describe_workteam(WorkteamName=workteam_name)["Workteam"]["WorkteamArn"]

else:
    workteam_name = ""
    workteam_arn = ""
    error_msg = '''
    ## Error
    Please navigate to 
    https://{region}.console.aws.amazon.com/sagemaker/groundtruth?region={region}#/labeling-workforces 
    and enter your workteam name in `workteam_name` and workteam_arn in `workteam_arn`
    '''.format(region=boto3.session.Session().region_name)
    if len(workteam_arn) == 0:
        display(md(error_msg))

### Update your user invitation email with the new workteam

In [None]:
signup_domain = get_signup_domain(workteam_name=workteam_name)

print("signup domain {}".format(signup_domain))
co_client.update_user_pool(
    UserPoolId=sagemaker_config["CognitoUserPool"],
    AdminCreateUserConfig={
        'InviteMessageTemplate': 
        {"EmailMessage": 
         "Hi there, \n\nYou are invited to work on a labelling project:\n\nSign up here: {}\n\n".format(signup_domain) +
         "Your username is '<b>{username}</b>' and your temporary password is '<b>{####}</b>'."}}
)

### Create your labelling job

In [None]:
sm_client = boto3.client('sagemaker')
sm_client.create_labeling_job(
        LabelingJobName="{}-labelling-job".format(sagemaker_config["SolutionPrefix"]),
        LabelAttributeName='annotations',
        InputConfig={
            'DataSource': {
                'S3DataSource': {
                    'ManifestS3Uri': "s3://{}/custom_data/data.manifest".format(s3_bucket)
                }
            },
            'DataAttributes': {
                'ContentClassifiers': [
                    'FreeOfAdultContent',
                ]
            }
        },

        OutputConfig={
            'S3OutputPath': "s3://{}/custom_data/output/".format(s3_bucket)
        },
        RoleArn=sagemaker_config["SageMakerIamRole"],

        StoppingConditions={
            'MaxPercentageOfInputDatasetLabeled': 100,
        },

        HumanTaskConfig={
            'WorkteamArn': workteam_arn,
            'UiConfig': {
                'UiTemplateS3Uri': 's3://{}/sagemaker/ground_truth/groundtruth.html'.format(
                    sagemaker_config["SolutionS3BucketName"])
            },
            'PreHumanTaskLambdaArn': sagemaker_config["PreLabelLambdaArn"],
            'TaskTitle': 'Annotate the texts in images',
            'TaskDescription': 'have fun',
            'NumberOfHumanWorkersPerDataObject': 1,
            'TaskTimeLimitInSeconds': 600,
            'AnnotationConsolidationConfig': {
                'AnnotationConsolidationLambdaArn': sagemaker_config["PostLabelLambdaArn"]
            }
        }
    )

In [None]:
my_session = boto3.session.Session()
my_region = my_session.region_name

string = '''

## Next step

Invite people to help you label your dataset. Click on the following link to enter their email
addresses to get an invite.

https://{}.console.aws.amazon.com/sagemaker/groundtruth?region={}#/labeling-workforces/add-workers
'''.format(my_region, my_region)

md(string)

# Add your invitees to the job

In [None]:
users = co_client.list_users(UserPoolId=sagemaker_config["CognitoUserPool"])

for user in users["Users"]:
    co_client.admin_add_user_to_group(
        UserPoolId=sagemaker_config["CognitoUserPool"],
        Username=user["Username"],
        GroupName=sagemaker_config["CognitoUserPoolGroup"]
    )

Now wait until they accept the invite and start on the job :)

# Navigation
- Click [here](./3_data_visualisation.ipynb) to visualise your own data
- Click [here](./4_model_training.ipynb) to train a network with your labelled dataset