# Module 3. Personalize 캠페인 생성 하기

이 노트북은 Module2에서 생성한 솔류션을 바탕으로 아래와 같은 작업을 합니다.
* 캠페인 생성
* 캠페인을 통해 특정 유저에 대한 추천 영화 리스트 얻기


## 라이브러리 임포트

파이썬에는 광범위한 라이브러리 모음이 포함되어 있으며, 본 핸즈온을 위해서 핵심 데이터 과학 도구인 boto3 (AWS SDK) 및 Pandas/Numpy와 같은 라이브러리를 가져와야 합니다.

In [1]:
# Imports
import boto3
import json
import numpy as np
import pandas as pd
import time

다음으로 여러분의 환경이 Amazon Personalize와 성공적으로 통신할 수 있는지 확인해야 합니다.

In [2]:
# Configure the SDK to Personalize:
personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

아래 코드 셀은 이전 notebook에서 저장했던 공유 변수들을 불러옵니다.

In [3]:
%store -r


생성할 오브젝트의 끝에 임의의 숫자를 부여하기 위해 suffix 정의

In [4]:
suffix = str(np.random.uniform())[4:9]

### 캠페인 생성 및 대기

작동하는 솔루션 버전을 보유하고 있으므로, 이제 애플리케이션과 함께 사용할 캠페인을 작성해야 합니다. 캠페인은 단순히 모델의 호스팅된 사본입니다. 물론 인프라가 프로비저닝되기까지의 시간이 소요됩니다.

#### 캠페인 생성

In [5]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-hrnn-campaign" + suffix,
    solutionVersionArn = hrnn_solution_version_arn,
    minProvisionedTPS = 1
)

hrnn_campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-northeast-2:870180618679:campaign/DEMO-hrnn-campaign91032",
  "ResponseMetadata": {
    "RequestId": "b4536446-7c81-43db-9b9c-43c92a4bc0df",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 08 Jun 2020 05:19:51 GMT",
      "x-amzn-requestid": "b4536446-7c81-43db-9b9c-43c92a4bc0df",
      "content-length": "98",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [6]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-hrnn-coldstart-campaign-" + suffix,
    solutionVersionArn = hrnn_coldstart_solution_version_arn,
    minProvisionedTPS = 1
)

hrnn_coldstart_campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-northeast-2:870180618679:campaign/DEMO-hrnn-coldstart-campaign-91032",
  "ResponseMetadata": {
    "RequestId": "792eadd4-5c67-477b-95ef-b9db6de51114",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 08 Jun 2020 05:19:51 GMT",
      "x-amzn-requestid": "792eadd4-5c67-477b-95ef-b9db6de51114",
      "content-length": "109",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [7]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-sims-campaign-" + suffix,
    solutionVersionArn = sims_solution_version_arn,
    minProvisionedTPS = 1
)

sims_campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-northeast-2:870180618679:campaign/DEMO-sims-campaign-91032",
  "ResponseMetadata": {
    "RequestId": "35be21dc-5371-4d5e-9b80-0b2e753cc09d",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 08 Jun 2020 05:19:51 GMT",
      "x-amzn-requestid": "35be21dc-5371-4d5e-9b80-0b2e753cc09d",
      "content-length": "99",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [8]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-ranking-campaign-" + suffix,
    solutionVersionArn = ranking_solution_version_arn,
    minProvisionedTPS = 1
)

ranking_campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:ap-northeast-2:870180618679:campaign/DEMO-ranking-campaign-91032",
  "ResponseMetadata": {
    "RequestId": "97b37e95-741b-4e1b-8b37-fc756c461207",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 08 Jun 2020 05:19:52 GMT",
      "x-amzn-requestid": "97b37e95-741b-4e1b-8b37-fc756c461207",
      "content-length": "102",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


#### 캠페인이 활성화 상태가 될 때까지 대기
소요시간은 약 20분 걸립니다.

In [9]:
%%time

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = hrnn_campaign_arn
    )
    status_hrnn = describe_campaign_response["campaign"]["status"]
    print("HRNN_Campaign: {}".format(status_hrnn))
    
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = hrnn_coldstart_campaign_arn
    )
    status_hrnn_cs = describe_campaign_response["campaign"]["status"]
    print("HRNN_Coldstart_Campaign: {}".format(status_hrnn_cs))
    
    describe_campaign_response = personalize.describe_campaign(
    campaignArn = sims_campaign_arn
    )
    status_sims = describe_campaign_response["campaign"]["status"]
    print("Sims_Campaign: {}".format(status_sims))
 
    describe_campaign_response = personalize.describe_campaign(
    campaignArn = ranking_campaign_arn
    )
    status_ranking = describe_campaign_response["campaign"]["status"]
    print("Ranking_Campaign: {}".format(status_ranking))
    
        
    
    if (status_hrnn == "ACTIVE" or status_hrnn == "CREATE FAILED")&\
       (status_hrnn_cs == "ACTIVE" or status_hrnn_cs == "CREATE FAILED")&\
       (status_sims == "ACTIVE" or status_sims == "CREATE FAILED")&\
       (status_ranking == "ACTIVE" or status_ranking == "CREATE FAILED"):
        break
    print("-------------------------------------->")
    time.sleep(60)

print("All Campaign creation completed")   

HRNN_Campaign: CREATE PENDING
HRNN_Coldstart_Campaign: CREATE PENDING
Sims_Campaign: CREATE PENDING
Ranking_Campaign: CREATE PENDING
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PROGRESS
Sims_Campaign: CREATE IN_PROGRESS
Ranking_Campaign: CREATE IN_PROGRESS
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PROGRESS
Sims_Campaign: CREATE IN_PROGRESS
Ranking_Campaign: CREATE IN_PROGRESS
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PROGRESS
Sims_Campaign: CREATE IN_PROGRESS
Ranking_Campaign: CREATE IN_PROGRESS
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PROGRESS
Sims_Campaign: CREATE IN_PROGRESS
Ranking_Campaign: CREATE IN_PROGRESS
-------------------------------------->
HRNN_Campaign: CREATE IN_PROGRESS
HRNN_Coldstart_Campaign: CREATE IN_PR

## 샘플 추천 결과 얻기

캠페인이 활성화되면 추천 결과를 받을 수 있습니다. 먼저 컬렉션에서 임의의 사용자를 선택해야 합니다. 그런 다음, ID 대신 추천을 위해 영화 정보를 표시하는 몇 가지 헬퍼 함수를 만듭니다.

In [10]:
df=pd.read_csv(interaction_filename)

# Getting a random user:
user_id, item_id, _,_,_ = df.sample().values[0]
print("USER: {}".format(user_id))

USER: 1749


In [11]:
items_all = pd.read_csv('./ml-1m/movies.dat',sep='::', encoding='latin1',names=['ITEM_ID', 'TITLE', 'GENRE'],)
items=items_all.copy()
items['to_keep'] = items['ITEM_ID'].apply(lambda x:x in unique_items)
items=items[items['to_keep']]
#items=items.set_index('ITEM_ID')
del items['to_keep']
items.tail()

  if __name__ == '__main__':


Unnamed: 0,ITEM_ID,TITLE,GENRE
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama
3882,3952,"Contender, The (2000)",Drama|Thriller


In [12]:
def get_movie_title(movie_id):
    """
    Takes in an ID, returns a title
    """
    movie_id = int(movie_id)
    movie_title=items[items['ITEM_ID']==movie_id]['TITLE']
    return (movie_title.tolist())


#### GetRecommendations 호출

아래 코드 셀을 실행하면 특정 사용자에 대한 추천 사항이 표시되고 추천 영화 목록이 반환됩니다.

In [13]:
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = hrnn_campaign_arn,
    userId = str(user_id),
)
# Update DF rendering
pd.set_option('display.max_rows', 30)

print("Recommendations for user: ", user_id)

item_list = get_recommendations_response['itemList']

recommendation_list = []

for item in item_list:
    title = get_movie_title(item['itemId'])
    recommendation_list.append(title)
    
recommendations_df = pd.DataFrame(recommendation_list, columns = ['OriginalRecs'])
recommendations_df

Recommendations for user:  1749


Unnamed: 0,OriginalRecs
0,"Slums of Beverly Hills, The (1998)"
1,Singles (1992)
2,So I Married an Axe Murderer (1993)
3,Notting Hill (1999)
4,"House of Yes, The (1997)"
5,Unstrung Heroes (1995)
6,"Wedding Singer, The (1998)"
7,Manhattan Murder Mystery (1993)
8,Drop Dead Gorgeous (1999)
9,Bob Roberts (1992)


## Amazon Personalize Batch Export 작업 


Amazon Personalize Batch기능을 활용하려면 json 파일 형식으로 추천 받고하 자는 사용자 또는 아이템 아이디를 json 형태의 파일로 s3에 저장하여야 합니다. Output의 형식도 json형태로 저장되며 지정한 S3 bucket 경로에 저장 되게 됩니다. 

HRNN 솔루션  Batch Input 예제: 

'{"userId": "4638"}'
'{"userId": "663"}'
'{"userId": "3384"}'

Batch Output 예제: 

{"input":{"userId":"4638"}, "output": {"recommendedItems": ["296", "1", "260", "318"]}}
{"input":{"userId":"663"}, "output": {"recommendedItems": ["1393", "3793", "2701", "3826"]}}
{"input":{"userId":"3384"}, "output": {"recommendedItems": ["8368", "5989", "40815", "48780"]}}



In [None]:
 Get the user list
batch_users = users_df.sample(3).index.tolist()

# Write the file to disk
json_input_filename = "json_input.json"
with open(data_dir + "/" + json_input_filename, 'w') as json_input:
    for user_id in batch_users:
        json_input.write('{"userId": "' + str(user_id) + '"}\n')

In [None]:
# Showcase the input file:
!cat $data_dir"/"$json_input_filename

In [None]:
# Upload files to S3
boto3.Session().resource('s3').Bucket(bucket_name).Object(json_input_filename).upload_file(data_dir+"/"+json_input_filename)
s3_input_path = "s3://" + bucket_name + "/" + json_input_filename
print(s3_input_path)

In [None]:

# Define the output path
s3_output_path = "s3://" + bucket_name + "/"
print(s3_output_path)

In [None]:
batchInferenceJobArn = personalize.create_batch_inference_job (
    solutionVersionArn = hrnn_solution_version_arn,
    jobName = "POC-Batch-Inference-Job-HRNN",
    roleArn = role_arn,
    jobInput = 
     {"s3DataSource": {"path": s3_input_path}},
    jobOutput = 
     {"s3DataDestination":{"path": s3_output_path}}
)
batchInferenceJobArn = batchInferenceJobArn['batchInferenceJobArn

In [None]:
current_time = datetime.now()
print("Import Started on: ", current_time.strftime("%I:%M:%S %p"))

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_inference_job_response = personalize.describe_batch_inference_job(
        batchInferenceJobArn = batchInferenceJobArn
    )
    status = describe_dataset_inference_job_response["batchInferenceJob"]['status']
    print("DatasetInferenceJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)
    
current_time = datetime.now()
print("Import Completed on: ", current_time.strftime("%I:%M:%S %p"))

In [None]:
s3 = boto3.client('s3')
export_name = json_input_filename + ".out"
s3.download_file(bucket_name, export_name, data_dir+"/"+export_name)

# Update DF rendering
pd.set_option('display.max_rows', 30)
with open(data_dir+"/"+export_name) as json_file:
    # Get the first line and parse it
    line = json.loads(json_file.readline())
    # Do the same for the other lines
    while line:
        # extract the user ID 
        col_header = "User: " + line['input']['userId']
        # Create a list for all the artists
        recommendation_list = []
        # Add all the entries
        for item in line['output']['recommendedItems']:
            artist = get_artist_by_id(item)
            recommendation_list.append(artist)
        if 'bulk_recommendations_df' in locals():
            new_rec_DF = pd.DataFrame(recommendation_list, columns = [col_header])
            bulk_recommendations_df = bulk_recommendations_df.join(new_rec_DF)
        else:
            bulk_recommendations_df = pd.DataFrame(recommendation_list, columns=[col_header])
        try:
            line = json.loads(json_file.readline())
        except:
            line = None
bulk_recommendations_df

## 리뷰

캠페인을 생성하고 실제적으로 특정 유저의 추천 영화 목록도 얻었습니다.
이제 다음 노트북으로 넘어갈 준비가 되었습니다. (`4.View_Campaign_And_Interactions.ipynb`)


## 다음 노트북에 대한 참고 사항

다음 실습에 필요한 몇 가지 값들이 있습니다. 아래 셀을 실행하여 저장한 후, 다음 주피터 노트북에서 그대로 사용할 수 있습니다.

In [15]:
%store hrnn_campaign_arn
%store hrnn_coldstart_campaign_arn
%store sims_campaign_arn
%store recommendations_df
%store user_id

Stored 'hrnn_campaign_arn' (str)
Stored 'hrnn_coldstart_campaign_arn' (str)
Stored 'sims_campaign_arn' (str)
Stored 'recommendations_df' (DataFrame)
Stored 'user_id' (int)
