### Get the Personalize boto3 Client

In [256]:
import boto3

import json
import numpy as np
import pandas as pd
import time
import datetime

personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

In [183]:
def createBucket(bucketname):
    s3 = boto3.client('s3')
    response = s3.list_buckets()
    existingbuckets = [d['Name'] for d in response["Buckets"]]
    #print(existingbuckets)
    if bucketname not in existingbuckets:
        print("creating bucket " + bucketname)
        s3.create_bucket(Bucket=bucketname)
    else:
        print("bucket exists! " + bucketname)


### Specify a Bucket and Data Output Location

In [184]:
accountid = boto3.client('sts').get_caller_identity().get('Account')
bucket = "aimlbootcamp" + accountid

createBucket(bucket)

#bucket = "personalize-demo"       # replace with the name of your S3 bucket
filename = "movie-lens-100k.csv"  # replace with a name that you want to save the dataset under

bucket exists! aimlbootcamp485483564801


### Download, Prepare, and Upload Training Data

#### Download and Explore the Dataset

In [185]:
!wget -N http://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip -o ml-100k.zip
data = pd.read_csv('./ml-100k/u.data', sep='\t', names=['USER_ID', 'ITEM_ID', 'RATING', 'TIMESTAMP'])
pd.set_option('display.max_rows', 5)
data

--2019-11-02 13:49:01--  http://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 304 Not Modified
File ‘ml-100k.zip’ not modified on server. Omitting download.

Archive:  ml-100k.zip
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base         
  inflating: ml-100k/u3.test         
  inflating: ml-100k/u4.base         
  inflat

Unnamed: 0,USER_ID,ITEM_ID,RATING,TIMESTAMP
0,196,242,3,881250949
1,186,302,3,891717742
...,...,...,...,...
99998,13,225,2,882399156
99999,12,203,3,879959583


#### Prepare and Upload Data

In [186]:
data = data[data['RATING'] > 3.6]                # keep only movies rated 3.6 and above
data = data[['USER_ID', 'ITEM_ID', 'TIMESTAMP']] # select columns that match the columns in the schema below
data.to_csv(filename, index=False)

boto3.Session().resource('s3').Bucket(bucket).Object(filename).upload_file(filename)

### Create Schema

In [187]:
def createschema(schema, name):
    
    response = personalize.list_schemas(
        maxResults=100
    )

    print("response: ", response)
    
    for item in response["schemas"]:
        if item["name"] == name:
            return item["schemaArn"]

    create_schema_response = personalize.create_schema(
        name = name,
        schema = json.dumps(schema)
    )

    schema_arn = create_schema_response['schemaArn']
    #print(json.dumps(create_schema_response, indent=2))
    return schema_arn

In [188]:
schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        }
    ],
    "version": "1.0"
}

schema_arn = createschema(schema, "aimlbootcamp-schema-personalize-20191102")
print("schema_arn: ", schema_arn)

response:  {'schemas': [{'name': 'DEMO-schema', 'schemaArn': 'arn:aws:personalize:us-east-1:485483564801:schema/DEMO-schema', 'creationDateTime': datetime.datetime(2019, 11, 2, 2, 31, 53, 455000, tzinfo=tzlocal()), 'lastUpdatedDateTime': datetime.datetime(2019, 11, 2, 2, 31, 53, 455000, tzinfo=tzlocal())}, {'name': 'aimlbootcamp-DEMO-schema', 'schemaArn': 'arn:aws:personalize:us-east-1:485483564801:schema/aimlbootcamp-DEMO-schema', 'creationDateTime': datetime.datetime(2019, 11, 2, 2, 52, 49, 340000, tzinfo=tzlocal()), 'lastUpdatedDateTime': datetime.datetime(2019, 11, 2, 2, 52, 49, 340000, tzinfo=tzlocal())}], 'ResponseMetadata': {'RequestId': 'cc60cb53-8350-40b5-afd5-4715735ad978', 'HTTPStatusCode': 200, 'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1', 'date': 'Sat, 02 Nov 2019 13:49:03 GMT', 'x-amzn-requestid': 'cc60cb53-8350-40b5-afd5-4715735ad978', 'content-length': '385', 'connection': 'keep-alive'}, 'RetryAttempts': 0}}
{
  "schemaArn": "arn:aws:personalize:us-east-

### Create and Wait for Dataset Group

#### Create Dataset Group

In [189]:
def createdatasetGroup(name):
    response = personalize.list_dataset_groups(
        maxResults=100
    )
    print("response: ", response)
    
    for item in response["datasetGroups"]:
        if item["name"] == name:
            return item["datasetGroupArn"]
    create_dataset_group_response = personalize.create_dataset_group(
        name = name
    )

    dataset_group_arn = create_dataset_group_response['datasetGroupArn']
    #print(json.dumps(create_dataset_group_response, indent=2))   
    return dataset_group_arn

In [190]:
dataset_group_arn = createdatasetGroup("aimlbootcamp-20191102")
print("dataset_group_arn: ", dataset_group_arn)

response:  {'datasetGroups': [{'name': 'DEMO-dataset-group', 'datasetGroupArn': 'arn:aws:personalize:us-east-1:485483564801:dataset-group/DEMO-dataset-group', 'status': 'ACTIVE', 'creationDateTime': datetime.datetime(2019, 11, 2, 2, 31, 53, 504000, tzinfo=tzlocal()), 'lastUpdatedDateTime': datetime.datetime(2019, 11, 2, 2, 32, 22, 889000, tzinfo=tzlocal())}, {'name': 'aimldatasetgroupname', 'datasetGroupArn': 'arn:aws:personalize:us-east-1:485483564801:dataset-group/aimldatasetgroupname', 'status': 'ACTIVE', 'creationDateTime': datetime.datetime(2019, 11, 2, 2, 56, 51, 110000, tzinfo=tzlocal()), 'lastUpdatedDateTime': datetime.datetime(2019, 11, 2, 2, 57, 19, 831000, tzinfo=tzlocal())}, {'name': 'daniel', 'datasetGroupArn': 'arn:aws:personalize:us-east-1:485483564801:dataset-group/daniel', 'status': 'ACTIVE', 'creationDateTime': datetime.datetime(2019, 7, 7, 2, 39, 9, 916000, tzinfo=tzlocal()), 'lastUpdatedDateTime': datetime.datetime(2019, 7, 7, 2, 39, 30, 409000, tzinfo=tzlocal())}],

#### Wait for Dataset Group to Have ACTIVE Status

In [191]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(5)

DatasetGroup: CREATE PENDING
DatasetGroup: CREATE PENDING
DatasetGroup: CREATE PENDING
DatasetGroup: CREATE PENDING
DatasetGroup: CREATE PENDING
DatasetGroup: ACTIVE


### Create Dataset

In [212]:
def createdataset(name, dataset_type, dataset_group_arn, schema_arn):
    response = personalize.list_datasets(
        datasetGroupArn=dataset_group_arn,
        maxResults=100
    )
    
    #print("response: ", response)
    for item in response["datasets"]:
        print("inspecting: ", item)
        if item["name"] == name or item["datasetType"] == dataset_type:
            #return item["datasetArn"]
            response = personalize.delete_dataset(
                datasetArn=item["datasetArn"]
            )
            max_time = time.time() + 2*60 # 10 minutes
            while time.time() < max_time:
                try:
                    response = personalize.describe_dataset(datasetArn=item["datasetArn"])
                except Exception as e:
                    if "ResourceNotFoundException".lower() in str(e).lower():
                        print("delete completed")
                        break
                except:
                    raise
                                    
                status = response["dataset"]["status"]
                print("DatasetGroup: {}".format(status))

                time.sleep(5)
            
            
    create_dataset_response = personalize.create_dataset(
        name = name,
        datasetType = dataset_type,
        datasetGroupArn = dataset_group_arn,
        schemaArn = schema_arn
    )
    print("dataset created....")
    dataset_arn = create_dataset_response['datasetArn']
    #print(json.dumps(create_dataset_response, indent=2))
    return dataset_arn

In [214]:
dataset_type = "INTERACTIONS"
dataset_arn = createdataset("aiml-bootcamp-dataset-20191102", dataset_type, dataset_group_arn, schema_arn)


print("datasetarn: ", dataset_arn)

inspecting:  {'name': 'aiml-bootcamp-dataset-20191102', 'datasetArn': 'arn:aws:personalize:us-east-1:485483564801:dataset/aimlbootcamp-20191102/INTERACTIONS', 'datasetType': 'INTERACTIONS', 'status': 'ACTIVE', 'creationDateTime': datetime.datetime(2019, 11, 2, 14, 11, 27, 823000, tzinfo=tzlocal()), 'lastUpdatedDateTime': datetime.datetime(2019, 11, 2, 14, 11, 27, 823000, tzinfo=tzlocal())}
DatasetGroup: DELETE PENDING
DatasetGroup: DELETE PENDING
DatasetGroup: DELETE PENDING
delete completed
dataset created....
datasetarn:  arn:aws:personalize:us-east-1:485483564801:dataset/aimlbootcamp-20191102/INTERACTIONS


### Prepare, Create, and Wait for Dataset Import Job

#### Attach Policy to S3 Bucket

In [249]:
s3 = boto3.client("s3")

policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicyBootcamp",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicyAIMLBootcamp",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket",
                "s3:PutObject",
                "s3:*"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket),
                "arn:aws:s3:::{}/*".format(bucket)
            ]
        }
    ]
}

policycreateresponse = s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy))

#### Create Personalize Role

In [250]:
def createPersonalizeIAMRole(role_name):
    iam = boto3.client("iam")
    assume_role_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
              "Effect": "Allow",
              "Principal": {
                "Service": "personalize.amazonaws.com"
              },
              "Action": "sts:AssumeRole"
            }
        ]
    }
    

    #print("response: ", response)
    #return
    try:
        create_role_response = iam.create_role(
            RoleName = role_name,
            AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
        )
        role_arn = create_role_response["Role"]["Arn"]
    except Exception as e:
        if "EntityAlreadyExists".lower() in str(e).lower():
            print("the role already exists!")
            response = iam.list_roles(
                PathPrefix="/",
                MaxItems=1000
            )
            #print("all roles: ", response)
            for item in response["Roles"]:
                if item["RoleName"] == role_name:
                    role_arn = item["Arn"]
                    break
                    
    except:
        raise

    # AmazonPersonalizeFullAccrole_arness provides access to any S3 bucket with a name that includes "personalize" or "Personalize" 
    # if you would like to use a bucket with a different name, please consider creating and attaching a new policy
    # that provides read access to your bucket or attaching the AmazonS3ReadOnlyAccess policy to the role
    policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess"
    iam.attach_role_policy(
        RoleName = role_name,
        PolicyArn = policy_arn
    )
    policy_arn = "arn:aws:iam::aws:policy/AmazonS3FullAccess"
    iam.attach_role_policy(
        RoleName = role_name,
        PolicyArn = policy_arn
    )   
    print("pausing execution to allow for IAM role propagation.....")
    time.sleep(30) # wait to allow IAM role policy attachment to propagate

    return role_arn   

In [251]:
role_name = "PersonalizeRoleAIMLBootcamp-imports-2"
role_arn = createPersonalizeIAMRole(role_name)
#json.dumps(create_dataset_response, indent=2)
print(role_arn)


the role already exists!
pausing execution to allow for IAM role propagation.....
arn:aws:iam::485483564801:role/PersonalizeRoleAIMLBootcamp-imports-1


#### Create Dataset Import Job

In [266]:
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "bootcamp-dataset-import-job-2",
    datasetArn = dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, filename)
    },
    roleArn = role_arn
)

dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-east-1:485483564801:dataset-import-job/bootcamp-dataset-import-job-2",
  "ResponseMetadata": {
    "RequestId": "138fe748-766f-4b3d-9d98-5511e1610c8e",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Sat, 02 Nov 2019 15:16:55 GMT",
      "x-amzn-requestid": "138fe748-766f-4b3d-9d98-5511e1610c8e",
      "content-length": "117",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


#### Wait for Dataset Import Job to Have ACTIVE Status

In [267]:
max_time = time.time() + 3*60*60 # 3 hours
showme = " "
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print(showme, datetime.datetime.now(), " DatasetImportJob: {}".format(status), "             ", end='\r')
    showme += "*"
    if len(showme)> 10:
        showme = " "
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(3.5719)

 ***** 2019-11-02 15:20:53.867965  DatasetImportJob: ACTIVE                              

### Select Recipe

In [272]:
list_recipes_response = personalize.list_recipes()
recipe_arn = "arn:aws:personalize:::recipe/aws-hrnn" # aws-hrnn selected for demo purposes
list_recipes_response

{'recipes': [{'name': 'aws-hrnn',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2019, 6, 20, 0, 39, 17, 65000, tzinfo=tzlocal())},
  {'name': 'aws-hrnn-coldstart',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn-coldstart',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2019, 6, 20, 0, 39, 17, 64000, tzinfo=tzlocal())},
  {'name': 'aws-hrnn-metadata',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn-metadata',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2019, 6, 20, 0, 39, 17, 64000, tzinfo=tzlocal())},
  {'name': 'aws-personalized-ranking',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-personalized-ranking',
   'stat

### Create and Wait for Solution

#### Create Solution

In [284]:
def createSolution(name, dataset_group_arn, recipe_arn):
    response = personalize.list_solutions(
        datasetGroupArn=dataset_group_arn,
        maxResults=100
    )
    print(response)
    for item in response["solutions"]:
        if item["name"] == name:
            print("solution with the same name already exists. Deleting the existing one.")
            response = personalize.delete_solution(solutionArn=item["solutionArn"])
            
            max_time = time.time() + 10*60 # 10 minutes
            showme = " "
            while time.time() < max_time:
                response = personalize.describe_solution(solutionArn=item["solutionArn"])
                status = response["solution"]['status']
                print(showme, datetime.datetime.now(), " solution: {}".format(status), "             ", end='\r')
                showme += "*"
                if len(showme)> 10:
                    showme = " "
                if status == "DELETED":
                    break

                time.sleep(3.5719)
            break
    
    create_solution_response = personalize.create_solution(
        name = name,
        datasetGroupArn = dataset_group_arn,
        recipeArn = recipe_arn
    )

    solution_arn = create_solution_response['solutionArn']
    print(json.dumps(create_solution_response, indent=2))
    return solution_arn

In [285]:
solution_arn = createSolution("aimlbootcampExampleSolution", dataset_group_arn, recipe_arn)
print(solution_arn)

{'solutions': [{'name': 'DEMO-solution', 'solutionArn': 'arn:aws:personalize:us-east-1:485483564801:solution/DEMO-solution', 'status': 'ACTIVE', 'creationDateTime': datetime.datetime(2019, 11, 2, 21, 26, 12, 120000, tzinfo=tzlocal()), 'lastUpdatedDateTime': datetime.datetime(2019, 11, 2, 21, 26, 12, 120000, tzinfo=tzlocal())}, {'name': 'aimlbootcampExampleSolution', 'solutionArn': 'arn:aws:personalize:us-east-1:485483564801:solution/aimlbootcampExampleSolution', 'status': 'ACTIVE', 'creationDateTime': datetime.datetime(2019, 11, 2, 21, 34, 23, 275000, tzinfo=tzlocal()), 'lastUpdatedDateTime': datetime.datetime(2019, 11, 2, 21, 34, 23, 275000, tzinfo=tzlocal())}], 'ResponseMetadata': {'RequestId': '6ee11582-73d6-4734-82f7-7b9fe141d689', 'HTTPStatusCode': 200, 'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1', 'date': 'Sat, 02 Nov 2019 21:53:48 GMT', 'x-amzn-requestid': '6ee11582-73d6-4734-82f7-7b9fe141d689', 'content-length': '441', 'connection': 'keep-alive'}, 'RetryAttempts

ResourceNotFoundException: An error occurred (ResourceNotFoundException) when calling the DescribeSolution operation: The given solution does not exist: arn:aws:personalize:us-east-1:485483564801:solution/aimlbootcampExampleSolution

#### Create Solution Version

In [None]:


create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn
)

solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

#### Wait for Solution Version to Have ACTIVE Status

In [None]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

#### Get Metrics of Solution

In [None]:
get_solution_metrics_response = personalize.get_solution_metrics(
    solutionVersionArn = solution_version_arn
)

print(json.dumps(get_solution_metrics_response, indent=2))

### Create and Wait for Campaign

#### Create Campaign

In [None]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-campaign",
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 1
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

#### Wait for Campaign to Have ACTIVE Status

In [None]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

### Get Recommendations

#### Select a User and an Item

In [None]:
items = pd.read_csv('./ml-100k/u.item', sep='|', usecols=[0,1], encoding='latin-1')
items.columns = ['ITEM_ID', 'TITLE']

user_id, item_id, _ = data.sample().values[0]
item_title = items.loc[items['ITEM_ID'] == item_id].values[0][-1]
print("USER: {}".format(user_id))
print("ITEM: {}".format(item_title))

items

#### Call GetRecommendations

In [None]:
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = str(user_id),
    itemId = str(item_id)
)

item_list = get_recommendations_response['itemList']
title_list = [items.loc[items['ITEM_ID'] == np.int(item['itemId'])].values[0][-1] for item in item_list]

print("Recommendations: {}".format(json.dumps(title_list, indent=2)))