# Module 3. Personalize 캠페인 생성 하기

이 노트북은 Module2에서 생성한 솔류션을 바탕으로 아래와 같은 작업을 합니다.
* 캠페인 생성
* 캠페인을 통해 특정 유저에 대한 추천 영화 리스트 얻기


## 라이브러리 임포트

파이썬에는 광범위한 라이브러리 모음이 포함되어 있으며, 본 핸즈온을 위해서 핵심 데이터 과학 도구인 boto3 (AWS SDK) 및 Pandas/Numpy와 같은 라이브러리를 가져와야 합니다.

In [3]:
# Imports
import boto3
import json
import numpy as np
import pandas as pd
import time
from datetime import datetime

다음으로 여러분의 환경이 Amazon Personalize와 성공적으로 통신할 수 있는지 확인해야 합니다.

In [4]:
# Configure the SDK to Personalize:
personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

아래 코드 셀은 이전 notebook에서 저장했던 공유 변수들을 불러옵니다.

In [5]:
%store -r

생성할 오브젝트의 끝에 임의의 숫자를 부여하기 위해 suffix 정의

In [6]:
suffix = str(np.random.uniform())[4:9]

### 캠페인 생성 및 대기

작동하는 솔루션 버전을 보유하고 있으므로, 이제 애플리케이션과 함께 사용할 캠페인을 작성해야 합니다. 캠페인은 단순히 모델의 호스팅된 사본입니다. 물론 인프라가 프로비저닝되기까지의 시간이 소요됩니다.

#### 캠페인 생성

In [7]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-hrnn-campaign" + suffix,
    solutionVersionArn = hrnn_solution_version_arn,
    minProvisionedTPS = 1
)

hrnn_campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-northeast-2:870180618679:campaign/DEMO-hrnn-campaign95530",
  "ResponseMetadata": {
    "RequestId": "bd9abd8f-48b3-4742-a155-faa1d4a1c80a",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 09 Jun 2020 01:34:30 GMT",
      "x-amzn-requestid": "bd9abd8f-48b3-4742-a155-faa1d4a1c80a",
      "content-length": "98",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [8]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-hrnn-coldstart-campaign-" + suffix,
    solutionVersionArn = hrnn_coldstart_solution_version_arn,
    minProvisionedTPS = 1
)

hrnn_coldstart_campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-northeast-2:870180618679:campaign/DEMO-hrnn-coldstart-campaign-95530",
  "ResponseMetadata": {
    "RequestId": "1b15bf34-27bb-4259-9080-63fe7df76a50",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 09 Jun 2020 01:34:31 GMT",
      "x-amzn-requestid": "1b15bf34-27bb-4259-9080-63fe7df76a50",
      "content-length": "109",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [9]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-sims-campaign-" + suffix,
    solutionVersionArn = sims_solution_version_arn,
    minProvisionedTPS = 1
)

sims_campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-northeast-2:870180618679:campaign/DEMO-sims-campaign-95530",
  "ResponseMetadata": {
    "RequestId": "f98b67ce-384b-408b-b5a2-a5cedba72e4c",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 09 Jun 2020 01:34:31 GMT",
      "x-amzn-requestid": "f98b67ce-384b-408b-b5a2-a5cedba72e4c",
      "content-length": "99",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [10]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-ranking-campaign-" + suffix,
    solutionVersionArn = ranking_solution_version_arn,
    minProvisionedTPS = 1
)

ranking_campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-northeast-2:870180618679:campaign/DEMO-ranking-campaign-95530",
  "ResponseMetadata": {
    "RequestId": "9b7ef2ea-b740-4781-ab01-f9a855633ef7",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 09 Jun 2020 01:34:33 GMT",
      "x-amzn-requestid": "9b7ef2ea-b740-4781-ab01-f9a855633ef7",
      "content-length": "102",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


#### 캠페인이 활성화 상태가 될 때까지 대기
소요시간은 약 20분 걸립니다.

In [17]:
%%time

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = hrnn_campaign_arn
    )
    status_hrnn = describe_campaign_response["campaign"]["status"]
    print("HRNN_Campaign: {}".format(status_hrnn))
    
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = hrnn_coldstart_campaign_arn
    )
    status_hrnn_cs = describe_campaign_response["campaign"]["status"]
    print("HRNN_Coldstart_Campaign: {}".format(status_hrnn_cs))
    
    describe_campaign_response = personalize.describe_campaign(
    campaignArn = sims_campaign_arn
    )
    status_sims = describe_campaign_response["campaign"]["status"]
    print("Sims_Campaign: {}".format(status_sims))
 
    describe_campaign_response = personalize.describe_campaign(
    campaignArn = ranking_campaign_arn
    )
    status_ranking = describe_campaign_response["campaign"]["status"]
    print("Ranking_Campaign: {}".format(status_ranking))
    
        
    
    if (status_hrnn == "ACTIVE" or status_hrnn == "CREATE FAILED")&\
       (status_hrnn_cs == "ACTIVE" or status_hrnn_cs == "CREATE FAILED")&\
       (status_sims == "ACTIVE" or status_sims == "CREATE FAILED")&\
       (status_ranking == "ACTIVE" or status_ranking == "CREATE FAILED"):
        break
    print("-------------------------------------->")
    time.sleep(60)

print("All Campaign creation completed")   

HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PROGRESS
Sims_Campaign: CREATE IN_PROGRESS
Ranking_Campaign: CREATE IN_PROGRESS
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PROGRESS
Sims_Campaign: CREATE IN_PROGRESS
Ranking_Campaign: CREATE IN_PROGRESS
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PROGRESS
Sims_Campaign: CREATE IN_PROGRESS
Ranking_Campaign: CREATE IN_PROGRESS
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PROGRESS
Sims_Campaign: CREATE IN_PROGRESS
Ranking_Campaign: CREATE IN_PROGRESS
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PROGRESS
Sims_Campaign: CREATE IN_PROGRESS
Ranking_Campaign: CREATE IN_PROGRESS
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campai

## 샘플 추천 결과 얻기

캠페인이 활성화되면 추천 결과를 받을 수 있습니다. 먼저 컬렉션에서 임의의 사용자를 선택해야 합니다. 그런 다음, ID 대신 추천을 위해 영화 정보를 표시하는 몇 가지 헬퍼 함수를 만듭니다.

In [18]:
df=pd.read_csv(interaction_filename)

# Getting a random user:
user_id, item_id, _,_,_ = df.sample().values[0]
print("USER: {}".format(user_id))

USER: 5213


In [41]:
items_all = pd.read_csv('./ml-1m/movies.dat',sep='::', encoding='latin1',names=['ITEM_ID', 'TITLE', 'GENRE'],)
items=items_all.copy()
items['to_keep'] = items['ITEM_ID'].apply(lambda x:x in unique_items)
items=items[items['to_keep']]
#items=items.set_index('ITEM_ID')
del items['to_keep']
items.tail()
#items[items['ITEM_ID']==2363]

  if __name__ == '__main__':


Unnamed: 0,ITEM_ID,TITLE,GENRE
2294,2363,Godzilla (Gojira) (1954),Action|Sci-Fi


In [20]:
def get_movie_title(movie_id):
    """
    Takes in an ID, returns a title
    """
    movie_id = int(movie_id)
    movie_title=items[items['ITEM_ID']==movie_id]['TITLE']
    return (movie_title.tolist())


#### HRNN GetRecommendations 호출

아래 코드 셀을 실행하면 특정 사용자에 대한 추천 사항이 표시되고 추천 영화 목록이 반환됩니다.

In [21]:
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = hrnn_campaign_arn,
    userId = str(user_id),
)
# Update DF rendering
pd.set_option('display.max_rows', 30)

print("Recommendations for user: ", user_id)

item_list = get_recommendations_response['itemList']
recommendation_title_list = []
recommendation_id_list=[]
for item in item_list:
    title = get_movie_title(item['itemId'])
    recommendation_title_list.append(title)
    recommendation_id_list.append(item['itemId'])
recommendations_df = pd.DataFrame(recommendation_title_list, columns = ['OriginalRecs'])
recommendations_df

Recommendations for user:  5213


Unnamed: 0,OriginalRecs
0,Man on the Moon (1999)
1,Chasing Amy (1997)
2,Lethal Weapon 2 (1989)
3,"Talented Mr. Ripley, The (1999)"
4,Interview with the Vampire (1994)
5,"Big Lebowski, The (1998)"
6,Mumford (1999)
7,"Firm, The (1993)"
8,Desperately Seeking Susan (1985)
9,Mary Poppins (1964)


#### Sims GetRecommendations 호출
아래 코드 셀을 실행하면 특정 아이템과 유사한 추천 영화 목록이 반환됩니다.

In [22]:
# Getting a random user:
user_id, item_id, _,_,_ = df.sample().values[0]
print("ITEM ID: {}".format(item_id))


get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = sims_campaign_arn,
    itemId = str(item_id),
)
# Update DF rendering
pd.set_option('display.max_rows', 30)

print("Recommendations for item_id: ", item_id)

item_list = get_recommendations_response['itemList']
recommendation_title_list = []
recommendation_id_list=[]
for item in item_list:
    title = get_movie_title(item['itemId'])
    recommendation_title_list.append(title)
    recommendation_id_list.append(item['itemId'])
recommendations_df = pd.DataFrame(recommendation_title_list, columns = ['OriginalRecs'])
recommendations_df

ITEM ID: 1617
Recommendations for item_id:  1617


Unnamed: 0,OriginalRecs
0,"Usual Suspects, The (1995)"
1,Reservoir Dogs (1992)
2,"Simple Plan, A (1998)"
3,"Grifters, The (1990)"
4,Chinatown (1974)
5,Devil in a Blue Dress (1995)
6,Sling Blade (1996)
7,"Manchurian Candidate, The (1962)"
8,Rear Window (1954)
9,"Silence of the Lambs, The (1991)"



## Personalized Ranking

Personalized Ranking의 핵심 사용 사례는 아이템 리스트를 가져 와서 사용자에게 우선 순위 또는 사용자 관심 순서로 표시하는 것입니다. 이 기능에 대해 알아보기 위해 이번 파트에서는 한명의 사용자와 랜덤하게 뽑은 25개의 아이템 항목을 가지고 테스트 해 봅니다. 

In [45]:
#Get the user list
df=pd.read_csv(interaction_filename)
df_users = df['USER_ID'].unique()
df_users=pd.DataFrame(df_users)
df_items=df['ITEM_ID'].unique()
df_items=pd.DataFrame(df_items)

rerank_user = df_users.sample(1).index.tolist()[0]
rerank_items = df_items.sample(25).index.tolist()

In [46]:
rerank_list = []
for item in rerank_items:
    title = get_movie_title(item)
    rerank_list.append(title)
rerank_df = pd.DataFrame(rerank_list, columns = [rerank_user])
rerank_df

Unnamed: 0,1534
0,"House of Yes, The (1997)"
1,Wide Awake (1998)
2,Lightning Jack (1994)
3,"Sunchaser, The (1996)"
4,"Great Day in Harlem, A (1994)"
5,Rent-a-Kid (1995)
6,Buddy (1997)
7,Dangerous Beauty (1998)
8,Shall We Dance? (1937)
9,Grease (1978)


In [47]:
# Convert user to string:
user_id = str(rerank_user)
rerank_item_list = []
for item in rerank_items:
    rerank_item_list.append(str(item))
    
# Get recommended reranking
get_recommendations_response_rerank = personalize_runtime.get_personalized_ranking(
        campaignArn = ranking_campaign_arn,
        userId = user_id,
        inputList = rerank_item_list
)

get_recommendations_response_rerank

{'ResponseMetadata': {'RequestId': 'ddb39058-c6ad-49c0-a3d9-da1ee0df1c47',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'date': 'Tue, 09 Jun 2020 02:41:32 GMT',
   'x-amzn-requestid': 'ddb39058-c6ad-49c0-a3d9-da1ee0df1c47',
   'content-length': '1358',
   'connection': 'keep-alive'},
  'RetryAttempts': 0},
 'personalizedRanking': [{'itemId': '1721', 'score': 0.8059301},
  {'itemId': '1758', 'score': 0.0800434},
  {'itemId': '1788', 'score': 0.0284955},
  {'itemId': '104', 'score': 0.0271772},
  {'itemId': '1648', 'score': 0.0220383},
  {'itemId': '1380', 'score': 0.0076381},
  {'itemId': '1812', 'score': 0.0071237},
  {'itemId': '1551', 'score': 0.0070304},
  {'itemId': '586', 'score': 0.0052192},
  {'itemId': '1066', 'score': 0.0044143},
  {'itemId': '602', 'score': 0.0026924},
  {'itemId': '294', 'score': 0.000657},
  {'itemId': '1169', 'score': 0.00058},
  {'itemId': '725', 'score': 0.0004885},
  {'itemId': '310', 'score': 0.0003071},
  {'itemId'

In [48]:
ranked_list = []
item_list = get_recommendations_response_rerank['personalizedRanking']
for item in item_list:
    title = get_movie_title(item['itemId'])
    ranked_list.append(title)
ranked_df = pd.DataFrame(ranked_list, columns = ['Re-Ranked'])
rerank_df = pd.concat([rerank_df, ranked_df], axis=1)
rerank_df

Unnamed: 0,1534,Re-Ranked
0,"House of Yes, The (1997)",Titanic (1997)
1,Wide Awake (1998),Dangerous Beauty (1998)
2,Lightning Jack (1994),Men With Guns (1997)
3,"Sunchaser, The (1996)",Happy Gilmore (1996)
4,"Great Day in Harlem, A (1994)","House of Yes, The (1997)"
5,Rent-a-Kid (1995),Grease (1978)
6,Buddy (1997),Wide Awake (1998)
7,Dangerous Beauty (1998),Buddy (1997)
8,Shall We Dance? (1937),Home Alone (1990)
9,Grease (1978),Shall We Dance? (1937)


## Amazon Personalize Batch Export 작업 


Amazon Personalize Batch기능을 활용하려면 json 파일 형식으로 추천 받고하 자는 사용자 또는 아이템 아이디를 json 형태의 파일로 s3에 저장하여야 합니다. Output의 형식도 json형태로 저장되며 지정한 S3 bucket 경로에 저장 되게 됩니다. 

HRNN 솔루션  Batch Input 예제: 

```JSON,
    {"userId": "4638"},
    {"userId": "663"},
    {"userId": "3384"},
```


Batch Output 예제: 
```JSON,
{"input":{"userId":"4638"}, "output": {"recommendedItems": ["296", "1", "260", "318"]}}
{"input":{"userId":"663"}, "output": {"recommendedItems": ["1393", "3793", "2701", "3826"]}}
{"input":{"userId":"3384"}, "output": {"recommendedItems": ["8368", "5989", "40815", "48780"]}}
```


In [53]:
#Get the user list
#batch_users = df_users.sample(3).index.tolist()
batch_users=df_users.index.tolist()
data_dir="dataset/"
# Write the file to disk
json_input_filename = "json_input.json"
with open(data_dir+json_input_filename, 'w') as json_input:
    for user_id in batch_users:
        json_input.write('{"userId": "' + str(user_id) + '"}\n')

In [54]:
# Showcase the input file:
!cat $data_dir$json_input_filename

{"userId": "0"}
{"userId": "1"}
{"userId": "2"}
{"userId": "3"}
{"userId": "4"}
{"userId": "5"}
{"userId": "6"}
{"userId": "7"}
{"userId": "8"}
{"userId": "9"}
{"userId": "10"}
{"userId": "11"}
{"userId": "12"}
{"userId": "13"}
{"userId": "14"}
{"userId": "15"}
{"userId": "16"}
{"userId": "17"}
{"userId": "18"}
{"userId": "19"}
{"userId": "20"}
{"userId": "21"}
{"userId": "22"}
{"userId": "23"}
{"userId": "24"}
{"userId": "25"}
{"userId": "26"}
{"userId": "27"}
{"userId": "28"}
{"userId": "29"}
{"userId": "30"}
{"userId": "31"}
{"userId": "32"}
{"userId": "33"}
{"userId": "34"}
{"userId": "35"}
{"userId": "36"}
{"userId": "37"}
{"userId": "38"}
{"userId": "39"}
{"userId": "40"}
{"userId": "41"}
{"userId": "42"}
{"userId": "43"}
{"userId": "44"}
{"userId": "45"}
{"userId": "46"}
{"userId": "47"}
{"userId": "48"}
{"userId": "49"}
{"userId": "50"}
{"userId": "51"}
{"userId": "52"}
{"userId": "53"}
{"userId": "54"}
{"userId": "55"}
{"

In [55]:
# Upload files to S3
boto3.Session().resource('s3').Bucket(bucket).Object(data_dir+json_input_filename).upload_file(data_dir+json_input_filename)
s3_input_path = "s3://" + bucket + "/" + data_dir+json_input_filename
print(s3_input_path)

s3://sagemaker-ap-northeast-2-870180618679/dataset/json_input.json


In [56]:
# Define the output path
s3_output_path = "s3://" + bucket + "/"+data_dir
print(s3_output_path)

s3://sagemaker-ap-northeast-2-870180618679/dataset/


In [57]:
print(role_arn)

arn:aws:iam::870180618679:role/PersonalizeRoleDemo40706


In [59]:
batchInferenceJobArn = personalize.create_batch_inference_job (
    solutionVersionArn = hrnn_solution_version_arn,
    jobName = "POC-Batch-Inference-Job-HRNN-"+suffix,
    roleArn = role_arn,
    jobInput = 
     {"s3DataSource": {"path": s3_input_path}},
    jobOutput = 
     {"s3DataDestination":{"path": s3_output_path}}
)
batchInferenceJobArn = batchInferenceJobArn['batchInferenceJobArn']

In [None]:
current_time = datetime.now()
print("Import Started on: ", current_time.strftime("%I:%M:%S %p"))

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_inference_job_response = personalize.describe_batch_inference_job(
        batchInferenceJobArn = batchInferenceJobArn
    )
    status = describe_dataset_inference_job_response["batchInferenceJob"]['status']
    print("DatasetInferenceJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)
    
current_time = datetime.now()
print("Import Completed on: ", current_time.strftime("%I:%M:%S %p"))

Import Started on:  02:45:15 AM
DatasetInferenceJob: CREATE PENDING


In [50]:
s3 = boto3.client('s3')
export_name = json_input_filename + ".out"
s3.download_file(bucket,data_dir+export_name,data_dir+export_name)

# Update DF rendering
pd.set_option('display.max_rows', 30)
with open("dataset/"+export_name) as json_file:
    # Get the first line and parse it
    line = json.loads(json_file.readline())
    # Do the same for the other lines
    while line:
        # extract the user ID 
        col_header = "User: " + line['input']['userId']
        # Create a list for all the artists
        recommendation_list = []
        # Add all the entries
        for item in line['output']['recommendedItems']:
            title = get_movie_title(item)
            recommendation_list.append(title)
        if 'bulk_recommendations_df' in locals():
            new_rec_DF = pd.DataFrame(recommendation_list, columns = [col_header])
            bulk_recommendations_df = bulk_recommendations_df.join(new_rec_DF)
        else:
            bulk_recommendations_df = pd.DataFrame(recommendation_list, columns=[col_header])
        try:
            line = json.loads(json_file.readline())
        except:
            line = None
bulk_recommendations_df

Unnamed: 0,User: 0
0,Titanic (1997)
1,Back to the Future (1985)
2,Saving Private Ryan (1998)
3,Star Wars: Episode V - The Empire Strikes Back...
4,E.T. the Extra-Terrestrial (1982)
5,"Godfather, The (1972)"
6,Raiders of the Lost Ark (1981)
7,Fatal Attraction (1987)
8,Schindler's List (1993)
9,L.A. Confidential (1997)


## 리뷰

캠페인을 생성하고 실제적으로 특정 유저의 추천 영화 목록도 얻었습니다.
이제 다음 노트북으로 넘어갈 준비가 되었습니다. (`4.View_Campaign_And_Interactions.ipynb`)


## 다음 노트북에 대한 참고 사항

다음 실습에 필요한 몇 가지 값들이 있습니다. 아래 셀을 실행하여 저장한 후, 다음 주피터 노트북에서 그대로 사용할 수 있습니다.

In [1]:
%store hrnn_campaign_arn
%store hrnn_coldstart_campaign_arn
%store sims_campaign_arn
%store ranking_campaign_arn
%store recommendations_df
%store user_id

UsageError: Unknown variable 'hrnn_campaign_arn'
