***Generating Personalized Emails with AWS Bedrock and AWS Personalize***


In this notebook, we will be working to implement product recommendations for user's based on past ratings and reviews. We will then incorporate AWS Bedrock to product marketing emails which will then be sent out to the users

In [None]:
import time
from time import sleep
import json
from datetime import datetime
import boto3
import pandas as pd
from io import StringIO

***Creating the Product Review Dataset***

Taking the initial amazon.csv file, we will be filtering out the respect columns of data to create a product data set. This would include fields like timestamp, product id, product name, and description

In [None]:


# Create the product review dataset filtering out respective columns


bucket_name = 'personalizeproductreviewdata'
review_data_key = 'amazon_data.csv'



data_s3_location1 = "s3://{}/{}".format(bucket_name, review_data_key)  # S3 URL
product_data = pd.read_csv(data_s3_location1)


product_data = product_data.drop('discounted_price', axis=1)
product_data = product_data.drop('actual_price', axis=1)
product_data = product_data.drop('discount_percentage', axis=1)
product_data = product_data.drop('user_id', axis=1)
product_data = product_data.drop('user_name', axis=1)
product_data = product_data.drop('review_id', axis=1)
product_data = product_data.drop('review_title', axis=1)
product_data = product_data.drop('review_content', axis=1)
product_data = product_data.drop('img_link', axis=1)
product_data = product_data.drop('product_link', axis=1)
product_data = product_data.drop('rating', axis=1)
product_data = product_data.drop('age', axis=1)



product_data.rename(columns={'timestamp': 'CREATION_TIMESTAMP', 'product_id': 'ITEM_ID', 'about_product': 'DESCRIPTION'}, inplace=True)
product_data['CREATION_TIMESTAMP'] = pd.to_datetime(product_data['CREATION_TIMESTAMP']).astype(int) // 10**9  # Convert to Unix timestamp (seconds)






product_data.head()

***Uploading the data to DynamoDB***

Using the boto client, we create a table with a schema where ITEM_ID is the primary key. After ensuring the table is ready, we convert the rating_count column in the product_data DataFrame to strings and iterate through the DataFrame, inserting each row as an item into the DynamoDB table.

In [None]:
# Upload the product data to DynamoDB


session = boto3.Session()
dynamodb = session.resource('dynamodb')

table_name = 'productdata'




try:
    table = dynamodb.Table(table_name)
    table.load()
    print(f"Table {table_name} already exists.")
except dynamodb.meta.client.exceptions.ResourceNotFoundException:
    # Define the table schema
    table = dynamodb.create_table(
        TableName=table_name,
        KeySchema=[
            {
                'AttributeName': 'ITEM_ID',
                'KeyType': 'HASH'  
            }
        ],
        AttributeDefinitions=[
            {
                'AttributeName': 'ITEM_ID',
                'AttributeType': 'S'
            }
        ],
        BillingMode='PAY_PER_REQUEST'
    )
    table.meta.client.get_waiter('table_exists').wait(TableName=table_name)
    print(f"Table {table_name} created successfully.")


    
product_data['rating_count'] = product_data['rating_count'].astype(str)

# Define the DynamoDB table
table = dynamodb.Table(table_name)

# Insert data into DynamoDB
for index, row in product_data.iterrows():
    item = {
        'timestamp': row['CREATION_TIMESTAMP'],
        'ITEM_ID': row['ITEM_ID'],
        'product_name': row['product_name'],
        'category': row['category'],
        'rating_count': row['rating_count'],
        'description': row['DESCRIPTION']
    }
    
    table.put_item(Item=item)

print("Data inserted successfully.")

***Create the user dataset***

We then load the user review data from an Amazon S3 bucket into a DataFrame filtering out certain columns and renaming some columns for consistency. Convert the TIMESTAMP column to a Unix timestamp in seconds.

In [None]:

# Create the user review dataset filtering out respective columns

bucket_name = 'personalizeproductreviewdata'
user_data_key = 'amazon_data.csv'
data_s3_location1 = "s3://{}/{}".format(bucket_name, user_data_key)  # S3 URL
user_data = pd.read_csv(data_s3_location1)


user_data = user_data.drop('rating_count', axis=1)
user_data = user_data.drop('category', axis=1)
user_data = user_data.drop('about_product', axis=1)
user_data = user_data.drop('img_link', axis=1)
user_data = user_data.drop('product_link', axis=1)
user_data = user_data.drop('discounted_price', axis=1)
user_data = user_data.drop('actual_price', axis=1)
user_data = user_data.drop('discount_percentage', axis=1)


user_data.rename(columns={'timestamp': 'TIMESTAMP', 'user_id': 'USER_ID', 'age': 'AGE'}, inplace=True)

user_data['TIMESTAMP'] = pd.to_datetime(user_data['TIMESTAMP']).astype(int) // 10**9  # Convert to Unix timestamp (seconds)
#user_data['rating'] = str(user_data['rating'])



user_data.info()
user_data.head()


***Write back user data to DynamoDB***


Using the boto client, we create a table with a schema where review_id as the primary key. After ensuring the table is ready, we convert the rating and review_content columns in the user_data DataFrame to strings.Iterate through the DataFrame, inserting each row as an item into the DynamoDB table.

In [None]:
# Read back the user data to DynamoDB

session = boto3.Session()

# Get the DynamoDB resource
dynamodb = session.resource('dynamodb')

# Define the table
table_name = 'userdata'

# Check if the table exists, if not, create it
try:
    table = dynamodb.Table(table_name)
    table.load()
    print(f"Table {table_name} already exists.")
except dynamodb.meta.client.exceptions.ResourceNotFoundException:
    # Define the table schema
    table = dynamodb.create_table(
        TableName=table_name,
        KeySchema=[
            {
                'AttributeName': 'review_id',
                'KeyType': 'HASH' 
            }
        ],
        AttributeDefinitions=[
            {
                'AttributeName': 'review_id',
                'AttributeType': 'S'
            }
        ],
        
        BillingMode='PAY_PER_REQUEST'
        
    )

    table.meta.client.get_waiter('table_exists').wait(TableName=table_name)
    print(f"Table {table_name} created successfully.")

table = dynamodb.Table(table_name)
user_data['rating'] = user_data['rating'].astype(str)
user_data['review_content'] = user_data['review_content'].astype(str)



for index, row in user_data.iterrows():
    item = {
        'timestamp': row['TIMESTAMP'],
        'age': row['AGE'],
        'product_id': row['product_id'],
        'review_id': row['review_id'],
        'review_title': row['review_title'],
        'rating': row['rating'],
        'user_id': row['USER_ID'],
        'user_name': row['user_name'],
        'review_content': row['review_content']
    }
    
    table.put_item(Item=item)

print("Data inserted successfully.")




***Creating the interaction dataset***

We perform sentiment analysis on user reviews using Amazon Comprehend, breaking down long reviews into manageable chunks for analysis. We then calculates the average sentiment score for each review and assigns an event type based on the rating, creating a new DataFrame with relevant columns for user interactions, sentiment, and event types.

In [None]:
# Perform sentiment analysis on review using Amazon Comprehend

import boto3
import pandas as pd

comprehend = boto3.client('comprehend', region_name='us-west-2')

def chunk_text(text, chunk_size=4000):
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    return chunks

def get_comprehend_sentiment(text):
    if not text:
        return None, None
    chunks = chunk_text(text)
    sentiments = []
    sentiment_scores = []
    for chunk in chunks:
        response = comprehend.detect_sentiment(Text=chunk, LanguageCode='en')
        sentiment = response['Sentiment']
        sentiment_score = response['SentimentScore'][sentiment.capitalize()]
        sentiments.append(sentiment)
        sentiment_scores.append(sentiment_score)
    avg_sentiment_score = sum(sentiment_scores) / len(sentiment_scores)
    return sentiments[0], avg_sentiment_score

processed_data = user_data.copy()
processed_data['rating'] = pd.to_numeric(processed_data['rating'], errors='coerce')

# Apply sentiment analysis to the 'review_content' column
processed_data[['Sentiment', 'SentimentScore']] = processed_data['review_content'].apply(lambda x: pd.Series(get_comprehend_sentiment(x)))

# Add the EVENT_TYPE column
processed_data['EVENT_TYPE'] = None
processed_data.loc[processed_data['rating'] > 4.0, 'EVENT_TYPE'] = 'read'
processed_data.loc[processed_data['rating'] > 3.0, 'EVENT_TYPE'] = 'click'

# Filter rows that have an EVENT_TYPE assigned
interactions_df = processed_data[processed_data['EVENT_TYPE'].notna()]

# Select relevant columns and rename them
interactions_df = interactions_df[['TIMESTAMP', 'user_name', 'product_id', 'product_name', 'EVENT_TYPE', 'Sentiment', 'SentimentScore']]
interactions_df.rename(columns={
    'timestamp': 'TIMESTAMP',
    'user_name': 'USER_ID',
    'product_id': 'ITEM_ID',
    'product_name': 'ITEM_NAME'
}, inplace=True)

# Print the final DataFrame
print(interactions_df)


***Read datasets into S3***

Read the interaction,product,and user dataset in the initialized S3 bucket

In [None]:
# read interaction data into S3

from io import StringIO
interactions_filename = "interactions.csv"

interactions_df.info()
print(interactions_df.columns)


csv_buffer = StringIO()
interactions_df.to_csv(csv_buffer, index=False)

s3_client = boto3.client('s3')
s3_client.put_object(Bucket='personalizeproductreviewdata', Key='interactions.csv', Body = csv_buffer.getvalue())


print(interactions_df.dtypes)


In [None]:
#Read product data into S3


csv_buffer = StringIO()
product_data.to_csv(csv_buffer, index=False)


s3_client = boto3.client('s3')
s3_client.put_object(Bucket='personalizeproductreviewdata', Key='product_data.csv', Body = csv_buffer.getvalue())




In [None]:
# read user data into S3

user_data = user_data.drop('review_content', axis=1)


csv_buffer = StringIO()
user_data.to_csv(csv_buffer, index=False)

s3_client = boto3.client('s3')
s3_client.put_object(Bucket='personalizeproductreviewdata', Key='user_data.csv', Body = csv_buffer.getvalue())

