# Deploy personalize and ranking campaign

#### To deploy with your own data, change the following 4 parameters:
1.dataset_group_name: the name of the dataset group 

2.dataset_name:the name of the dataset 

3.interactions_schema: the schema of the interaction data 

4.ranking_campaign_name: the name of the ranking campaign 

### Import Personalize and S3

In [2]:
import boto3
import json
import numpy as np
import pandas as pd
import time
import datetime

personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

with open('/opt/ml/metadata/resource-metadata.json') as notebook_info:
    data = json.load(notebook_info)
    resource_arn = data['ResourceArn']
    region = resource_arn.split(':')[3]
print('region:', region)

s3 = boto3.client('s3')
account_id = boto3.client('sts').get_caller_identity().get('Account')
bucket_name = account_id + "-" + region + "-" + "personalizemanagedretailers"
print('bucket_name:', bucket_name)

try: 
    if region == "us-east-1":
        s3.create_bucket(Bucket=bucket_name)
    else:
        s3.create_bucket(
            Bucket = bucket_name,
            CreateBucketConfiguration={'LocationConstraint': region}
            )
except s3.exceptions.BucketAlreadyOwnedByYou:
    print("Bucket already exists. Using bucket", bucket_name)

region: us-east-1
bucket_name: 513489159680-us-east-1-personalizemanagedretailers


### upload interaction data to s3

In [15]:
interactions_file = '../retail_data/interactions.csv'
filename = interactions_file.split('/')[-1]
print(filename)
boto3.Session().resource('s3').Bucket(bucket_name).Object(filename).upload_file(interactions_file)
interactions_s3DataPath = "s3://"+bucket_name+"/"+filename

interactions.csv


### Set the S3 bucket policy

In [5]:
s3 = boto3.client("s3")
policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket_name),
                "arn:aws:s3:::{}/*".format(bucket_name)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket_name, Policy=json.dumps(policy))

{'ResponseMetadata': {'RequestId': 'AP31HT9WB5PN4N4H',
  'HostId': 'oB/00KZAXP7XSc5t2XiETR275yaPwa7piloxDSsb/lBTWQAB8v4OtcjsjZgR4XsaUJjAnLvoWQQ=',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': 'oB/00KZAXP7XSc5t2XiETR275yaPwa7piloxDSsb/lBTWQAB8v4OtcjsjZgR4XsaUJjAnLvoWQQ=',
   'x-amz-request-id': 'AP31HT9WB5PN4N4H',
   'date': 'Mon, 20 Nov 2023 02:33:25 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

### Create Dataset Group

In [7]:
dataset_group_name = 'personalize_ecomemerce_ds_group'
response = personalize.create_dataset_group(
    name=dataset_group_name,
    domain='ECOMMERCE'
)

dataset_group_arn = response['datasetGroupArn']
print(json.dumps(response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:us-east-1:513489159680:dataset-group/personalize_ecomemerce_ds_group",
  "domain": "ECOMMERCE",
  "ResponseMetadata": {
    "RequestId": "2ff871ec-c9c5-4bfa-8e38-77e714201aba",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 20 Nov 2023 02:34:46 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "131",
      "connection": "keep-alive",
      "x-amzn-requestid": "2ff871ec-c9c5-4bfa-8e38-77e714201aba"
    },
    "RetryAttempts": 0
  }
}


In [8]:
%%time

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetGroup: ACTIVE
CPU times: user 5.33 ms, sys: 0 ns, total: 5.33 ms
Wall time: 36.6 ms


### Create Interactions Schema

In [9]:
interactions_schema = schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        },
        {
            "name": "EVENT_TYPE",
            "type": "string"
            
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = "personalize-ecommerce-interatn_group",
    domain = "ECOMMERCE",
    schema = json.dumps(interactions_schema)
)

interaction_schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-east-1:513489159680:schema/personalize-ecommerce-interatn_group",
  "ResponseMetadata": {
    "RequestId": "cdf27516-ab63-491f-b67c-ba168bd88c48",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 20 Nov 2023 02:34:53 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "102",
      "connection": "keep-alive",
      "x-amzn-requestid": "cdf27516-ab63-491f-b67c-ba168bd88c48"
    },
    "RetryAttempts": 0
  }
}


### Create Interactions Dataset

In [10]:
dataset_name = "personalize_ecommerce_demo_interactions"
dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    name = dataset_name,
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = interaction_schema_arn
)

interactions_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))

{
  "datasetArn": "arn:aws:personalize:us-east-1:513489159680:dataset/personalize_ecomemerce_ds_group/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "9356ce9e-2306-4c82-a6e3-35e27906b694",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 20 Nov 2023 02:34:57 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "112",
      "connection": "keep-alive",
      "x-amzn-requestid": "9356ce9e-2306-4c82-a6e3-35e27906b694"
    },
    "RetryAttempts": 0
  }
}


### Create Personalize Role

In [13]:
iam = boto3.client("iam")

role_name = "PersonalizeRoleEcommerceDemoRecommender"
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "personalize.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

create_role_response = iam.create_role(
    RoleName = role_name,
    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
)

policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess"
iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = policy_arn
)

# # Now add S3 support
iam.attach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',
    RoleName=role_name
)
time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate

role_arn = create_role_response["Role"]["Arn"]
print(role_arn)


arn:aws:iam::513489159680:role/PersonalizeRoleEcommerceDemoRecommender


## Import the data

### Create Interactions Dataset Import Job

In [17]:
create_interactions_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "personalize_ecommerce_demo_interactions_import",
    datasetArn = interactions_dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket_name, filename)
    },
    roleArn = role_arn
)

dataset_interactions_import_job_arn = create_interactions_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_interactions_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-east-1:513489159680:dataset-import-job/personalize_ecommerce_demo_interactions_import",
  "ResponseMetadata": {
    "RequestId": "b5a55bcd-6da6-419d-99ec-da866e956af6",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 20 Nov 2023 02:42:58 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "134",
      "connection": "keep-alive",
      "x-amzn-requestid": "b5a55bcd-6da6-419d-99ec-da866e956af6"
    },
    "RetryAttempts": 0
  }
}


In [18]:
%%time

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_interactions_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print("DatasetImportJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: CREATE PENDING
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE
CPU times: user 83.5 ms, sys: 4.32 ms, total: 87.8 ms
Wall time: 3min


## build ranking recipe

In [19]:
rank_recipe_arn = "arn:aws:personalize:::recipe/aws-personalized-ranking"
rank_create_solution_response = personalize.create_solution(
    name = "personalize-ranking",
    datasetGroupArn = dataset_group_arn,
    recipeArn = rank_recipe_arn
)

rank_solution_arn = rank_create_solution_response['solutionArn']
print(json.dumps(rank_create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:us-east-1:513489159680:solution/personalize-ranking",
  "ResponseMetadata": {
    "RequestId": "98b8749c-a2b2-494b-aa26-a87cf0470176",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 20 Nov 2023 02:46:12 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "89",
      "connection": "keep-alive",
      "x-amzn-requestid": "98b8749c-a2b2-494b-aa26-a87cf0470176"
    },
    "RetryAttempts": 0
  }
}


In [20]:
rank_create_solution_version_response = personalize.create_solution_version(
    solutionArn = rank_solution_arn
)
rank_solution_version_arn = rank_create_solution_version_response['solutionVersionArn']
print(json.dumps(rank_create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:513489159680:solution/personalize-ranking/197c10e1",
  "ResponseMetadata": {
    "RequestId": "ecd9a1ed-f53a-4a98-b8b7-2b9ac82cc8bb",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 20 Nov 2023 02:46:14 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "105",
      "connection": "keep-alive",
      "x-amzn-requestid": "ecd9a1ed-f53a-4a98-b8b7-2b9ac82cc8bb"
    },
    "RetryAttempts": 0
  }
}


In [21]:
in_progress_solution_versions = [
    rank_solution_version_arn
]

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    for solution_version_arn in in_progress_solution_versions:
        version_response = personalize.describe_solution_version(
            solutionVersionArn = solution_version_arn
        )
        status = version_response["solutionVersion"]["status"]
        
        if status == "ACTIVE":
            print("Build succeeded for {}".format(solution_version_arn))
            in_progress_solution_versions.remove(solution_version_arn)
        elif status == "CREATE FAILED":
            print("Build failed for {}".format(solution_version_arn))
            in_progress_solution_versions.remove(solution_version_arn)
    
    if len(in_progress_solution_versions) <= 0:
        break
    else:
        print("At least one solution build is still in progress")
        
    time.sleep(60)

At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solutio

### build ranking campaign

In [22]:
ranking_campaign_name = "personalize-poc-rerank"
rank_create_campaign_response = personalize.create_campaign(
    name = ranking_campaign_name,
    solutionVersionArn = rank_solution_version_arn,
    minProvisionedTPS = 1
)
rank_campaign_arn = rank_create_campaign_response['campaignArn']
print(json.dumps(rank_create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:us-east-1:513489159680:campaign/personalize-poc-rerank",
  "ResponseMetadata": {
    "RequestId": "2d565355-9d32-4ff5-8cc2-d05a493b6162",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 20 Nov 2023 03:11:27 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "92",
      "connection": "keep-alive",
      "x-amzn-requestid": "2d565355-9d32-4ff5-8cc2-d05a493b6162"
    },
    "RetryAttempts": 0
  }
}


In [23]:
in_progress_campaigns = [
    rank_campaign_arn
]

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    for campaign_arn in in_progress_campaigns:
        version_response = personalize.describe_campaign(
            campaignArn = campaign_arn
        )
        status = version_response["campaign"]["status"]
        
        if status == "ACTIVE":
            print("Build succeeded for {}".format(campaign_arn))
            in_progress_campaigns.remove(campaign_arn)
        elif status == "CREATE FAILED":
            print("Build failed for {}".format(campaign_arn))
            in_progress_campaigns.remove(campaign_arn)
    
    if len(in_progress_campaigns) <= 0:
        break
    else:
        print("At least one campaign build is still in progress")
        
    time.sleep(60)

At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
Build succeeded for arn:aws:personalize:us-east-1:513489159680:campaign/personalize-poc-rerank


### Test

In [24]:
user_id = '3156'
item_id_list = ['b93b7b15-9bb3-407c-b80b-517e7c45e090','3946f4c8-1b5b-4161-b794-70b33affb671','b98a3579-2a92-47e5-a9ae-65d776c76ac3']

personalize_runtime = boto3.client('personalize-runtime')
result = personalize_runtime.get_personalized_ranking(
    campaignArn = rank_campaign_arn,
    userId = user_id,
    inputList = item_id_list
)

ranking_result = result['personalizedRanking']        
print("ranking_result:",ranking_result)

ranking_result: [{'itemId': 'b93b7b15-9bb3-407c-b80b-517e7c45e090', 'score': 0.9976291}, {'itemId': '3946f4c8-1b5b-4161-b794-70b33affb671', 'score': 0.0020539}, {'itemId': 'b98a3579-2a92-47e5-a9ae-65d776c76ac3', 'score': 0.000317}]
