## Movie Recommendations 

### Imports

In [51]:
import pandas as pd
import numpy as np
import boto3
import uuid
import json
import time

### 1. Essentials

In [2]:
personalize_runtime = boto3.client('personalize-runtime')
personalize = boto3.client('personalize')
s3 = boto3.Session().resource('s3')

<p><b>Note:</b> Under IAM, find the execution role that we created for this SageMaker notebook <br>
    and attach the following policies to it.<br><br>
    1. AmazonForecastFullAccess<br>
    2. AmazonPersonalizeFullAccess<br>
</p>

### 2. Prep Data

In [3]:
data = pd.read_csv('./DATA/data.csv')
data.head(3)

Unnamed: 0,USER_ID,ITEM_ID,TIMESTAMP
0,298,474,884182806
1,253,465,891628467
2,286,1014,879781125


In [4]:
data.shape

(55375, 3)

In [5]:
# CHANGE BUCKET NAME BELOW
s3.Bucket('personalize-demo-892313895307').Object('data.csv').upload_file('./DATA/data.csv')

### 3. Create Schema to Map Data

In [6]:
schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        }
    ],
    "version": "1.0"
}

In [9]:
schema_response = personalize.create_schema(
    name = 'personalize-demo-schema',
    schema = json.dumps(schema)
)

In [10]:
print(json.dumps(schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-east-1:892313895307:schema/personalize-demo-schema",
  "ResponseMetadata": {
    "RequestId": "df0075ca-2f8d-46fa-bd12-8d0a118d30d2",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 14 Oct 2020 21:20:11 GMT",
      "x-amzn-requestid": "df0075ca-2f8d-46fa-bd12-8d0a118d30d2",
      "content-length": "89",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [11]:
schema_arn = schema_response['schemaArn']
schema_arn

'arn:aws:personalize:us-east-1:892313895307:schema/personalize-demo-schema'

### 4. Create Dataset Group

In [14]:
create_dataset_group_response = personalize.create_dataset_group(name='personalize-demo-dataset-group')
print(json.dumps(create_dataset_group_response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:us-east-1:892313895307:dataset-group/personalize-demo-dataset-group",
  "ResponseMetadata": {
    "RequestId": "f93db570-5149-46ed-8ac1-0955ed21cbef",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 14 Oct 2020 21:20:35 GMT",
      "x-amzn-requestid": "f93db570-5149-46ed-8ac1-0955ed21cbef",
      "content-length": "109",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [15]:
dataset_group_arn = create_dataset_group_response['datasetGroupArn']
dataset_group_arn

'arn:aws:personalize:us-east-1:892313895307:dataset-group/personalize-demo-dataset-group'

### 5. Create Dataset

In [17]:
dataset_type = 'INTERACTIONS'
create_dataset_response = personalize.create_dataset(
    name = 'personalize-demo-dataset',
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = schema_arn
)
print(json.dumps(create_dataset_response, indent=2))
dataset_arn = create_dataset_response['datasetArn']
dataset_arn

{
  "datasetArn": "arn:aws:personalize:us-east-1:892313895307:dataset/personalize-demo-dataset-group/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "74f35ee0-38fa-404d-84cb-d15d6c280a76",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 14 Oct 2020 21:21:39 GMT",
      "x-amzn-requestid": "74f35ee0-38fa-404d-84cb-d15d6c280a76",
      "content-length": "111",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


'arn:aws:personalize:us-east-1:892313895307:dataset/personalize-demo-dataset-group/INTERACTIONS'

### 6. Create a Dataset Import Job (Copy Data from S3 to Personalize)

<p>
    <b>IMPORTANT:</b> Under IAM > Roles > Create Role <br>
    Create a role for service <b>Personalize</b> and attach <b>AmazonS3FullAccess</b> if not attached by default.
</p>

In [24]:
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "personalize-create-dataset-import-job",
    datasetArn = dataset_arn,
    dataSource = {
        'dataLocation': 's3://personalize-demo-892313895307/data.csv'
    },
    roleArn = 'arn:aws:iam::892313895307:role/PersonalizeRole'
)
print(json.dumps(create_dataset_import_job_response, indent=2))
dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
dataset_import_job_arn

{
  "datasetImportJobArn": "arn:aws:personalize:us-east-1:892313895307:dataset-import-job/personalize-create-dataset-import-job",
  "ResponseMetadata": {
    "RequestId": "e74a20e8-88cf-452b-81ce-c225eee600a6",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 14 Oct 2020 21:32:43 GMT",
      "x-amzn-requestid": "e74a20e8-88cf-452b-81ce-c225eee600a6",
      "content-length": "125",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


'arn:aws:personalize:us-east-1:892313895307:dataset-import-job/personalize-create-dataset-import-job'

In [25]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print("DatasetImportJob: {}".format(status))
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
    time.sleep(60)

DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE


### 7. Create the Solution & Version

In [26]:
list_recipes_response = personalize.list_recipes()
list_recipes_response

{'recipes': [{'name': 'aws-hrnn',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2020, 8, 25, 16, 57, 0, 148000, tzinfo=tzlocal())},
  {'name': 'aws-hrnn-coldstart',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn-coldstart',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2020, 8, 25, 16, 57, 0, 148000, tzinfo=tzlocal())},
  {'name': 'aws-hrnn-metadata',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn-metadata',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2020, 8, 25, 16, 57, 0, 148000, tzinfo=tzlocal())},
  {'name': 'aws-personalized-ranking',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-personalized-ranking',
   's

In [27]:
recipe_arn = 'arn:aws:personalize:::recipe/aws-user-personalization'

In [28]:
create_solution_response = personalize.create_solution(
    name = 'personalize-demo-soln-user-personalization',
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn
)

print(json.dumps(create_solution_response, indent=2))
solution_arn = create_solution_response['solutionArn']
solution_arn

{
  "solutionArn": "arn:aws:personalize:us-east-1:892313895307:solution/personalize-demo-soln-user-personalization",
  "ResponseMetadata": {
    "RequestId": "d812409c-c319-4054-bdb1-57d51ad1c743",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 14 Oct 2020 21:50:58 GMT",
      "x-amzn-requestid": "d812409c-c319-4054-bdb1-57d51ad1c743",
      "content-length": "112",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


'arn:aws:personalize:us-east-1:892313895307:solution/personalize-demo-soln-user-personalization'

In [29]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn
)

print(json.dumps(create_solution_version_response, indent=2))
solution_version_arn = create_solution_version_response['solutionVersionArn']
solution_version_arn

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:892313895307:solution/personalize-demo-soln-user-personalization/69f6b0b3",
  "ResponseMetadata": {
    "RequestId": "b575aeb5-000e-4dee-ae99-25342c961775",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 14 Oct 2020 21:56:58 GMT",
      "x-amzn-requestid": "b575aeb5-000e-4dee-ae99-25342c961775",
      "content-length": "128",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


'arn:aws:personalize:us-east-1:892313895307:solution/personalize-demo-soln-user-personalization/69f6b0b3'

In [None]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response['solutionVersion']['status']
    print('SolutionVersion: {}'.format(status))
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
    time.sleep(60)

SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS


### 8. Get the Metrics 

In [31]:
solution_version_arn = 'arn:aws:personalize:us-east-1:892313895307:solution/personalize-demo-soln-user-personalization/69f6b0b3'

In [32]:
get_solution_metrics_response = personalize.get_solution_metrics(
    solutionVersionArn = solution_version_arn
)

print(json.dumps(get_solution_metrics_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:892313895307:solution/personalize-demo-soln-user-personalization/69f6b0b3",
  "metrics": {
    "coverage": 0.3101,
    "mean_reciprocal_rank_at_25": 0.1206,
    "normalized_discounted_cumulative_gain_at_10": 0.1278,
    "normalized_discounted_cumulative_gain_at_25": 0.1961,
    "normalized_discounted_cumulative_gain_at_5": 0.0764,
    "precision_at_10": 0.0427,
    "precision_at_25": 0.0333,
    "precision_at_5": 0.0404
  },
  "ResponseMetadata": {
    "RequestId": "a3b88934-310f-4acc-b987-322269d25f20",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Thu, 15 Oct 2020 02:00:37 GMT",
      "x-amzn-requestid": "a3b88934-310f-4acc-b987-322269d25f20",
      "content-length": "426",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


### 9. Setup a Campaign (Host the Model)

In [33]:
create_campaign_response = personalize.create_campaign(
    name = 'personalize-demo-campaign',
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 1,
    campaignConfig = {
        'itemExplorationConfig': {
            'explorationWeight': '0.5'
        }
    }
)
print(json.dumps(create_campaign_response, indent=2))
campaign_arn = create_campaign_response['campaignArn']
campaign_arn

{
  "campaignArn": "arn:aws:personalize:us-east-1:892313895307:campaign/personalize-demo-campaign",
  "ResponseMetadata": {
    "RequestId": "9f95829b-dd04-4b82-a87e-0e7f26dd009f",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Thu, 15 Oct 2020 02:02:32 GMT",
      "x-amzn-requestid": "9f95829b-dd04-4b82-a87e-0e7f26dd009f",
      "content-length": "95",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


'arn:aws:personalize:us-east-1:892313895307:campaign/personalize-demo-campaign'

In [34]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
    time.sleep(60)

Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: ACTIVE


### 10. Test Inference - Get Recommendations

In [35]:
# Get a random user
user_id, item_id, _ = data.sample().values[0]
print("USER: {}".format(user_id))

USER: 716


In [38]:
# Load Items (Movie Titles)

items = pd.read_csv('./DATA/movies', sep='|', usecols=[0, 1], encoding='latin-1', names=['ITEM_ID', 'TITLE'], index_col='ITEM_ID')

In [40]:
items.head(3)

Unnamed: 0_level_0,TITLE
ITEM_ID,Unnamed: 1_level_1
1,Toy Story (1995)
2,GoldenEye (1995)
3,Four Rooms (1995)


In [41]:
def get_movie_title(movie_id):
    """
    Takes in an ID, returns a title.
    """
    movie_id = int(movie_id)
    return items.iloc[movie_id]['TITLE']

In [42]:
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = str(user_id),
)

In [43]:
pd.set_option('display.max_rows', 30)

In [44]:
print("Recommendations for user: ", user_id)

item_list = get_recommendations_response['itemList']

recommendation_list = []

for item in item_list:
    title = get_movie_title(item['itemId'])
    recommendation_list.append(title)
    
recommendations_df = pd.DataFrame(recommendation_list, columns=['OriginalRecs'])
recommendations_df

Recommendations for user:  716


Unnamed: 0,OriginalRecs
0,"Nightmare on Elm Street, A (1984)"
1,Jean de Florette (1986)
2,Blade Runner (1982)
3,Amityville 1992: It's About Time (1992)
4,Jaws 2 (1978)
5,Outbreak (1995)
6,Just Cause (1995)
7,"Good, The Bad and The Ugly, The (1966)"
8,Radioland Murders (1994)
9,William Shakespeare's Romeo and Juliet (1996)


### 11. Create an Event Tracker 

In [45]:
# Establish a connection to Personalize's Event Streaming
personalize_events = boto3.client(service_name='personalize-events')

In [46]:
response = personalize.create_event_tracker(
    name='MovieClickTracker',
    datasetGroupArn=dataset_group_arn
)
print(response['eventTrackerArn'])
print(response['trackingId'])
TRACKING_ID = response['trackingId']
TRACKING_ID

arn:aws:personalize:us-east-1:892313895307:event-tracker/41179ef9
65d1388b-ccab-41b7-b258-637306002dbe


'65d1388b-ccab-41b7-b258-637306002dbe'

In [47]:
event_tracker_arn = response['eventTrackerArn']

### 12. Simulate User Behavior

In [48]:
session_dict = {}

In [52]:
def send_movie_click(USER_ID, ITEM_ID):
    """
    Simulates a click as an event
    to send an event to Amazon Personalize's Event Tracker
    """
    # Configure Session
    try:
        session_ID = session_dict[USER_ID]
    except:
        session_dict[USER_ID] = str(uuid.uuid1())
        session_ID = session_dict[USER_ID]
        
    # Configure Properties:
    event = {
    "itemId": str(ITEM_ID),
    }
    event_json = json.dumps(event)
        
    # Make Call
    personalize_events.put_events(
    trackingId = TRACKING_ID,
    userId= USER_ID,
    sessionId = session_ID,
    eventList = [{
        'sentAt': int(time.time()),
        'eventType': 'EVENT_TYPE',
        'properties': event_json
        }]
)

In [53]:
# Pick a movie, we will use ID 270 or Gattaca
movie_to_click = 270
movie_title_clicked = get_movie_title(movie_to_click)
send_movie_click(USER_ID=str(user_id), ITEM_ID=movie_to_click)

In [54]:
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = str(user_id),
)

print("Recommendations for user: ", user_id)

item_list = get_recommendations_response['itemList']

recommendation_list = []

for item in item_list:
    title = get_movie_title(item['itemId'])
    recommendation_list.append(title)
    
new_rec_DF = pd.DataFrame(recommendation_list, columns = [movie_title_clicked])

recommendations_df = recommendations_df.join(new_rec_DF)
recommendations_df

Recommendations for user:  716


Unnamed: 0,OriginalRecs,Starship Troopers (1997)
0,"Nightmare on Elm Street, A (1984)",Heat (1995)
1,Jean de Florette (1986),"Nightmare on Elm Street, A (1984)"
2,Blade Runner (1982),Outbreak (1995)
3,Amityville 1992: It's About Time (1992),Blade Runner (1982)
4,Jaws 2 (1978),Good Will Hunting (1997)
5,Outbreak (1995),Desperate Measures (1998)
6,Just Cause (1995),3 Ninjas: High Noon At Mega Mountain (1998)
7,"Good, The Bad and The Ugly, The (1966)",Beverly Hills Cop III (1994)
8,Radioland Murders (1994),Austin Powers: International Man of Mystery (1...
9,William Shakespeare's Romeo and Juliet (1996),In the Name of the Father (1993)
