In [41]:
import time

import numpy as np
import pandas as pd

import boto3
import json

In [4]:
personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

# CloudformationのStackから情報を取得

In [74]:
stack = boto3.resource('cloudformation').Stack(STACK_NAME)

STACK_NAME = "amazon-personalize-example-stack"
outputs = {o["OutputKey"]: o["OutputValue"] for o in stack.outputs}

S3_BUCKET_NAME = outputs["S3BucketName"]
IAM_ROLE_ARN = outputs["IAMRoleArn"]

# データセットグループ作成

In [70]:
create_dataset_group_response = personalize.create_dataset_group(
    name = "amzon-personalize-example-dataset-group"
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']

# データセットのSchema作成

In [73]:
schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "EVENT_TYPE",
            "type": "string"
        },
        {
            "name": "EVENT_VALUE",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = "amzon-personalize-example-schema",
    schema = json.dumps(schema)
)

schema_arn = create_schema_response['schemaArn']

# データを整形＆アップロード

In [None]:
filename = "amazon-personalize-example"
data = pd.read_csv('./ml-100k/u.data', sep='\t', names=['USER_ID', 'ITEM_ID', 'RATING', 'TIMESTAMP'])

# Amazon Personalizeに合わせた形に整形
data["EVENT_VALUE"] = data["RATING"]
data["EVENT_TYPE"] = "rating"
data = data[['USER_ID', 'ITEM_ID', 'EVENT_TYPE', 'EVENT_VALUE', 'TIMESTAMP']]

# csvファイルとして書き出し
data.to_csv(filename, index=False)

# アップロード
boto3.Session().resource('s3').Bucket(S3_BUCKET_NAME).Object(filename).upload_file(filename)

In [94]:
data.head()

Unnamed: 0,USER_ID,ITEM_ID,EVENT_TYPE,EVENT_VALUE,TIMESTAMP
0,196,242,rating,3,881250949
1,186,302,rating,3,891717742
2,22,377,rating,1,878887116
3,244,51,rating,2,880606923
4,166,346,rating,1,886397596


# データセットを作成＆S3からデータ読み込み

### データセット作成

In [75]:
dataset_type = "INTERACTIONS"

create_dataset_response = personalize.create_dataset(
    name = "amazon-personalize-dataset",
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = schema_arn
)

dataset_arn = create_dataset_response['datasetArn']

### S3からインポート

In [76]:
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "amazon-personalize-dataset-import-job",
    datasetArn = dataset_arn,
    dataSource = {
        "dataLocation": f"s3://{S3_BUCKET_NAME}/{filename}"
    },
    roleArn = IAM_ROLE_ARN
)

dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']

### インポート状況をチェック＆待機

In [29]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print(f"DatasetImportJob: {status}")
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)


DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE


# Solutionの作成＆Versionの作成

### Solutionの作成

In [77]:
recipe_arn = "arn:aws:personalize:::recipe/aws-hrnn"
create_solution_response = personalize.create_solution(
    name = "amazon-personalize-dataset-solution",
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn
)

solution_arn = create_solution_response['solutionArn']

### Versionの作成

In [78]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn
)

solution_version_arn = create_solution_version_response['solutionVersionArn']

### SolutionのVersion作成状況をチェック＆待機

In [34]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print(f"SolutionVersion: {status}")
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_P

# キャンペーン作成

In [79]:
create_campaign_response = personalize.create_campaign(
    name = "amazon-personalize-dataset-campaign",
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 1
)

campaign_arn = create_campaign_response['campaignArn']

### キャンペーン作成状況をチェック＆待機

In [36]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print(f"Campaign: {status}")
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

Campaign: CREATE PENDING
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: ACTIVE


# レコメンデーションの取得(モデルの予測結果取得)

In [95]:
items = pd.read_csv('./ml-100k/u.item', sep='|', usecols=[0,1], header=None, encoding="ISO-8859-1")
items.columns = ['ITEM_ID', 'TITLE']
item_maps = items.set_index("ITEM_ID").to_dict()['TITLE']

print(f"USER: {user_id}")

USER: 384


In [93]:
user_id = 384
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = str(user_id)
)

item_list = get_recommendations_response['itemList']
title_list = [item_maps[int(item["itemId"])] for item in item_list]

print(f"Recommendations: {json.dumps(title_list, indent=2)}")

Recommendations: [
  "George of the Jungle (1997)",
  "Murder at 1600 (1997)",
  "Devil's Own, The (1997)",
  "Spawn (1997)",
  "Volcano (1997)",
  "Crash (1996)",
  "G.I. Jane (1997)",
  "Dante's Peak (1997)",
  "Starship Troopers (1997)",
  "Event Horizon (1997)",
  "Jackal, The (1997)",
  "Cop Land (1997)",
  "Kiss the Girls (1997)",
  "Alien: Resurrection (1997)",
  "As Good As It Gets (1997)",
  "Bean (1997)",
  "Midnight in the Garden of Good and Evil (1997)",
  "Fly Away Home (1996)",
  "Peacemaker, The (1997)",
  "Mother (1996)",
  "Mad City (1997)",
  "Jungle2Jungle (1997)",
  "In & Out (1997)",
  "Saint, The (1997)",
  "Lost Highway (1997)"
]


# 別のレシピ

In [50]:
recipe_arn = "arn:aws:personalize:::recipe/aws-personalized-ranking"
create_solution_response = personalize.create_solution(
    name = "amazon-personalize-dataset-solution-ranking",
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn
)

solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:ap-northeast-1:081200852128:solution/amazon-personalize-dataset-solution-ranking",
  "ResponseMetadata": {
    "RequestId": "44699cbf-3194-477b-94b1-566dd8da67dc",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Thu, 13 Jun 2019 09:29:11 GMT",
      "x-amzn-requestid": "44699cbf-3194-477b-94b1-566dd8da67dc",
      "content-length": "118",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [51]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn
)

solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:ap-northeast-1:081200852128:solution/amazon-personalize-dataset-solution-ranking/e58d04e7",
  "ResponseMetadata": {
    "RequestId": "85ea3c25-1b03-4140-b48a-be1e376a1466",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Thu, 13 Jun 2019 09:29:26 GMT",
      "x-amzn-requestid": "85ea3c25-1b03-4140-b48a-be1e376a1466",
      "content-length": "134",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [52]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print(f"SolutionVersion: {status}")
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_P

In [53]:
create_campaign_response = personalize.create_campaign(
    name = "amazon-personalize-dataset-campaign-ranking",
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 1
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-northeast-1:081200852128:campaign/amazon-personalize-dataset-campaign-ranking",
  "ResponseMetadata": {
    "RequestId": "f18833b0-e2ca-4906-840d-0503bb674642",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Thu, 13 Jun 2019 10:02:38 GMT",
      "x-amzn-requestid": "f18833b0-e2ca-4906-840d-0503bb674642",
      "content-length": "118",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [59]:
item_list

[{'itemId': '127'},
 {'itemId': '100'},
 {'itemId': '340'},
 {'itemId': '923'},
 {'itemId': '197'},
 {'itemId': '50'},
 {'itemId': '272'},
 {'itemId': '172'},
 {'itemId': '300'},
 {'itemId': '56'},
 {'itemId': '168'},
 {'itemId': '603'},
 {'itemId': '661'},
 {'itemId': '315'},
 {'itemId': '259'},
 {'itemId': '269'},
 {'itemId': '307'},
 {'itemId': '357'},
 {'itemId': '302'},
 {'itemId': '275'},
 {'itemId': '7'},
 {'itemId': '638'},
 {'itemId': '165'},
 {'itemId': '166'},
 {'itemId': '258'}]

In [66]:
get_recommendations_response = personalize_runtime.get_personalized_ranking(
    campaignArn = campaign_arn,
    userId = str(user_id),
    inputList = [str(item_id), "259"]
)

item_list = get_recommendations_response['itemList']

KeyError: 'itemList'

In [67]:
get_recommendations_response

{'ResponseMetadata': {'RequestId': '6f6b2842-e021-409b-85c6-eaf077d007fb',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'date': 'Thu, 13 Jun 2019 10:52:59 GMT',
   'x-amzn-requestid': '6f6b2842-e021-409b-85c6-eaf077d007fb',
   'content-length': '59',
   'connection': 'keep-alive'},
  'RetryAttempts': 0},
 'personalizedRanking': [{'itemId': '259'}, {'itemId': '434'}]}