# Imports

In [1]:
import boto3
import json
import time
from botocore.exceptions import ClientError

# Create a session

Remember to active the secret keys in "Add-ons"

In [2]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

In [3]:
region = input("Type in your region: ")

Type in your region:  eu-west-1


In [4]:
try:
    session = boto3.Session(
        aws_access_key_id=user_secrets.get_secret("access_key"),
        aws_secret_access_key=user_secrets.get_secret("secret_access_key"),
        region_name=region
    )
except Exception as e:
    print("Error occurred while creating session: ", e)

In [5]:
try:
    personalize = session.client('personalize')
    personalize_runtime = session.client('personalize-runtime')
except Exception as e:
    print("Error creating client: ", e)

# Create Dataset Group

In [6]:
dataset_group_name = input('Type in desired dataset-group-name: ')

Type in desired dataset-group-name:  fast-bachelor-project


In [7]:
try:
    response = personalize.create_dataset_group(
        name=dataset_group_name,
        domain='VIDEO_ON_DEMAND'
    )
    print(f'Dataset group {dataset_group_name} created')
except Exception as e:
    print(f'Error creating dataset group {dataset_group_name}: {e}')

Dataset group fast-bachelor-project created


In [8]:
dataset_group_arn = response['datasetGroupArn']
print(json.dumps(response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:eu-west-1:267355461125:dataset-group/fast-bachelor-project",
  "domain": "VIDEO_ON_DEMAND",
  "ResponseMetadata": {
    "RequestId": "752f90ea-1323-43eb-9528-e2bb2e3a710c",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 27 Apr 2023 14:45:43 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "127",
      "connection": "keep-alive",
      "x-amzn-requestid": "752f90ea-1323-43eb-9528-e2bb2e3a710c"
    },
    "RetryAttempts": 0
  }
}


# Create Schemas

## Create Interactions Schema

In [9]:
schema_name = input('Type in desired schema-name: ')

Type in desired schema-name:  interactions-schema-v1


In [10]:
schema = {
  "type": "record",
  "name": "Interactions",
  "namespace": "com.amazonaws.personalize.schema",
  "fields": [
      {
          "name": "USER_ID",
          "type": "string"
      },
      {
          "name": "ITEM_ID",
          "type": "string"
      },
      {
          "name": "EVENT_TYPE",
          "type": "string"
      },
      {
          "name": "eventValue",
          "type": "float"
      },
      {
          "name": "TIMESTAMP",
          "type": "long"
      }
  ],
  "version": "1.0"
}

In [11]:
try: 
    create_interactions_schema_response = personalize.create_schema(
        name=schema_name,
        schema=json.dumps(schema),
        domain='VIDEO_ON_DEMAND'
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'ResourceAlreadyExistsException':
        print("Schema already exists")
    else:
        print("Unexpected error: %s" % e)

In [12]:
interactions_schema_arn = create_interactions_schema_response['schemaArn']
print(json.dumps(create_interactions_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:eu-west-1:267355461125:schema/interactions-schema-v1",
  "ResponseMetadata": {
    "RequestId": "9157b918-b7ac-49c3-b6f2-b9b563736378",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 27 Apr 2023 14:46:41 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "88",
      "connection": "keep-alive",
      "x-amzn-requestid": "9157b918-b7ac-49c3-b6f2-b9b563736378"
    },
    "RetryAttempts": 0
  }
}


## Create Items Schema

In [8]:
schema_name = input('Type in desired schema-name: ')

Type in desired schema-name:  items-schema-5


In [9]:
schema = {
  "type": "record",
  "name": "Items",
  "namespace": "com.amazonaws.personalize.schema",
  "fields": [
    {
      "name": "ITEM_ID",
      "type": "string"
    },
    {
      "name": "ACTORS",
      "type": [
        "string"
      ],
      "categorical": True
    },
    {
      "name": "DIRECTOR",
      "type": [
        "string"   
      ],
      "categorical": True
    },
    {
      "name": "GENRES",
      "type": [
        "string"
      ],
      "categorical": True
    },
    {
      "name": "TITLE",
      "type": "string"
    },
    {
      "name": "RELEASE_YEAR",
      "type": "string",
      "categorical": True
    },
    {
      "name": "eventValue",
      "type": "float",
    },
    {
      "name": "CREATION_TIMESTAMP",
      "type": "long"
    }
  ],
  "version": "1.0"
}

In [10]:
try: 
    create_items_schema_response = personalize.create_schema(
        name=schema_name,
        schema=json.dumps(schema),
        domain='VIDEO_ON_DEMAND'
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'ResourceAlreadyExistsException':
        print("Schema already exists")
    else:
        print("Unexpected error: %s" % e)

In [11]:
items_schema_arn = create_items_schema_response['schemaArn']
print(json.dumps(create_items_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:eu-west-1:267355461125:schema/items-schema-5",
  "ResponseMetadata": {
    "RequestId": "352f5e9a-64d9-400f-9de9-5e59f5385467",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Fri, 28 Apr 2023 07:37:01 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "80",
      "connection": "keep-alive",
      "x-amzn-requestid": "352f5e9a-64d9-400f-9de9-5e59f5385467"
    },
    "RetryAttempts": 0
  }
}


## Create Users Schema

In [17]:
schema_name = input('Type in desired schema-name: ')

Type in desired schema-name:  users-schema-v1


In [18]:
schema = {
    "type": "record",
    "name": "Users",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
      {
          "name": "USER_ID",
          "type": "string"
      },
        {
          "name": "AGE",
          "type": "string",
          "categorical": True
      },
      {
          "name": "SEX",
          "type": "string",
          "categorical": True
      }
    ],
    "version": "1.0"
}

In [19]:
try: 
    create_users_schema_response = personalize.create_schema(
        name=schema_name,
        schema=json.dumps(schema),
        domain='VIDEO_ON_DEMAND'
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'ResourceAlreadyExistsException':
        print("Schema already exists")
    else:
        print("Unexpected error: %s" % e)

In [20]:
users_schema_arn = create_users_schema_response['schemaArn']
print(json.dumps(create_users_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:eu-west-1:267355461125:schema/users-schema-v1",
  "ResponseMetadata": {
    "RequestId": "e8ffb1ff-7586-4761-8012-2c99c9a8649b",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 27 Apr 2023 14:47:18 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "81",
      "connection": "keep-alive",
      "x-amzn-requestid": "e8ffb1ff-7586-4761-8012-2c99c9a8649b"
    },
    "RetryAttempts": 0
  }
}


# Create Personalize Role

In [21]:
role_name = input('Type in desired role-name: ')

Type in desired role-name:  personalize-role


In [22]:
iam = session.client("iam")
role_name = role_name
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "personalize.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

In [23]:
try:
    create_role_response = iam.create_role(
        RoleName = role_name,
        AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
    )
except Exception as e:
    print(f"Error creating IAM role: {e}")

## Add S3 support

In [24]:
iam.attach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',
    RoleName=role_name
)
time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate

In [25]:
role_arn = create_role_response["Role"]["Arn"]
print(role_arn)

arn:aws:iam::267355461125:role/personalize-role


# Create Datasets

## Create Interactions Dataset

In [26]:
name = input('Type in desired dataset-name: ')

Type in desired dataset-name:  interactions-dataset


In [27]:
try:
    create_dataset_response = personalize.create_dataset(
        name = name,
        datasetType = "INTERACTIONS",
        datasetGroupArn = dataset_group_arn,
        schemaArn = interactions_schema_arn
    )
except Exception as e:
    print(f"Error creating dataset: {e}")

In [28]:
interactions_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))

{
  "datasetArn": "arn:aws:personalize:eu-west-1:267355461125:dataset/fast-bachelor-project/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "3aec72c4-3e9b-463a-9bbb-dc6147021561",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 27 Apr 2023 14:49:31 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "102",
      "connection": "keep-alive",
      "x-amzn-requestid": "3aec72c4-3e9b-463a-9bbb-dc6147021561"
    },
    "RetryAttempts": 0
  }
}


## Create Items Dataset

In [12]:
name = input('Type in desired dataset-name: ')

Type in desired dataset-name:  items-dataset


In [13]:
try:
    create_dataset_response = personalize.create_dataset(
        name = name,
        datasetType = "ITEMS",
        datasetGroupArn = dataset_group_arn,
        schemaArn = items_schema_arn
    )
except Exception as e:
    print(f"Error creating dataset: {e}")

In [14]:
items_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))

{
  "datasetArn": "arn:aws:personalize:eu-west-1:267355461125:dataset/fast-bachelor-project/ITEMS",
  "ResponseMetadata": {
    "RequestId": "c809579b-9ef2-44fe-b1c0-628e81267251",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Fri, 28 Apr 2023 07:37:24 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "95",
      "connection": "keep-alive",
      "x-amzn-requestid": "c809579b-9ef2-44fe-b1c0-628e81267251"
    },
    "RetryAttempts": 0
  }
}


## Create Users Dataset

In [32]:
name = input('Type in desired dataset-name: ')

Type in desired dataset-name:  users-dataset


In [33]:
try:
    create_dataset_response = personalize.create_dataset(
        name = name,
        datasetType = "USERS",
        datasetGroupArn = dataset_group_arn,
        schemaArn = users_schema_arn
    )
except Exception as e:
    print(f"Error creating dataset: {e}")

In [34]:
users_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))

{
  "datasetArn": "arn:aws:personalize:eu-west-1:267355461125:dataset/fast-bachelor-project/USERS",
  "ResponseMetadata": {
    "RequestId": "e4f9d6a3-2c96-4217-a5c4-86f3b2edb41a",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 27 Apr 2023 14:49:53 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "95",
      "connection": "keep-alive",
      "x-amzn-requestid": "e4f9d6a3-2c96-4217-a5c4-86f3b2edb41a"
    },
    "RetryAttempts": 0
  }
}


# Create Import Jobs

In [35]:
bucket_name = input('Type in bucket name: ')

Type in bucket name:  fast-bachelor-project-personalize


## Create Interactions Dataset Import Job

In [36]:
job_name = input('Type in desired job-name: ')

Type in desired job-name:  interactions-dataset-import-job


In [37]:
try:
    create_interactions_dataset_import_job_response = personalize.create_dataset_import_job(
        jobName = job_name,
        datasetArn = interactions_dataset_arn,
        dataSource = {
            "dataLocation": "s3://{}/{}".format(bucket_name, 'interactions.csv')
        },
        roleArn = role_arn
    )
    print(f'Created interactions dataset import job with name "{job_name}"')
except Exception as e:
    print(f'Error creating interactions dataset import job: {e}')

Created interactions dataset import job with name "interactions-dataset-import-job"


In [38]:
dataset_interactions_import_job_arn = create_interactions_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_interactions_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:eu-west-1:267355461125:dataset-import-job/interactions-dataset-import-job",
  "ResponseMetadata": {
    "RequestId": "dc9a5fa4-37fa-4c2f-a073-32041111d294",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 27 Apr 2023 14:50:34 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "119",
      "connection": "keep-alive",
      "x-amzn-requestid": "dc9a5fa4-37fa-4c2f-a073-32041111d294"
    },
    "RetryAttempts": 0
  }
}


## Create Items Dataset Import Job

In [15]:
job_name = input('Type in desired job-name: ')

Type in desired job-name:  items-dataset-import-job


In [18]:
bucket_name = "fast-bachelor-project-personalize"

In [20]:
role_arn = "arn:aws:iam::267355461125:role/service-role/AmazonPersonalize-ExecutionRole-1682602731902"

In [21]:
create_items_dataset_import_job_response = personalize.create_dataset_import_job(
        jobName = job_name,
        datasetArn = items_dataset_arn,
        dataSource = {
            "dataLocation": "s3://{}/{}".format(bucket_name, 'items.csv')
        },
        roleArn = role_arn
    )

In [22]:
try:
    create_items_dataset_import_job_response = personalize.create_dataset_import_job(
        jobName = job_name,
        datasetArn = items_dataset_arn,
        dataSource = {
            "dataLocation": "s3://{}/{}".format(bucket_name, 'items.csv')
        },
        roleArn = role_arn
    )
    print(f'Created items dataset import job with name "{job_name}"')
except Exception as e:
    print(f'Error creating items dataset import job: {e}')

Error creating items dataset import job: An error occurred (ResourceInUseException) when calling the CreateDatasetImportJob operation: dataset import job is in PENDING or IN_PROGRESS status: arn:aws:personalize:eu-west-1:267355461125:dataset-import-job/items-dataset-import-job


In [23]:
dataset_items_import_job_arn = create_items_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_items_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:eu-west-1:267355461125:dataset-import-job/items-dataset-import-job",
  "ResponseMetadata": {
    "RequestId": "9faf322f-d678-4d3f-b602-335ed3c55fac",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Fri, 28 Apr 2023 07:40:18 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "112",
      "connection": "keep-alive",
      "x-amzn-requestid": "9faf322f-d678-4d3f-b602-335ed3c55fac"
    },
    "RetryAttempts": 0
  }
}


## Create Users Dataset Import Job

In [62]:
job_name = input('Type in desired job-name: ')

Type in desired job-name:  users-dataset-import-job


In [63]:
try:
    create_users_dataset_import_job_response = personalize.create_dataset_import_job(
        jobName = job_name,
        datasetArn = users_dataset_arn,
        dataSource = {
            "dataLocation": "s3://{}/{}".format(bucket_name, 'users.csv')
        },
        roleArn = role_arn
    )
    print(f'Created users dataset import job with name "{job_name}"')
except Exception as e:
    print(f'Error creating users dataset import job: {e}')

Created users dataset import job with name "users-dataset-import-job"


In [64]:
dataset_users_import_job_arn = create_users_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_users_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:eu-west-1:267355461125:dataset-import-job/users-dataset-import-job",
  "ResponseMetadata": {
    "RequestId": "0460424d-774c-4563-b4f6-9a8510922468",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 27 Apr 2023 15:07:49 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "112",
      "connection": "keep-alive",
      "x-amzn-requestid": "0460424d-774c-4563-b4f6-9a8510922468"
    },
    "RetryAttempts": 0
  }
}
