In [18]:
# import necessary libraries
import pandas as pd
from openai import OpenAI
# Import the token counter from openai
import tiktoken
from textblob import TextBlob
from IPython.display import Image
from PIL import Image as PILImage
import requests
from io import BytesIO
import nltk
from nltk.tokenize import sent_tokenize
import numpy as np
from numpy.linalg import norm

In [2]:
# get product description and details
with open("La Roche-Posay Toleriane Double Repair Face Moisturizer_description.txt", encoding="utf-8") as f:
    description = f.read()
# remove new lines and double spaces
description = description.replace("\n", " ")
description = description.replace("  ", " ")
description

"La Roche-Posay Toleriane Double Repair Face Moisturizer Highlights Hydrates and maintains moisture barrier Fortifies skin with ceramides Calms skin with niacinamide Product Overview La Roche-Posay Toleriane Double Repair Face Moisturizer soothes and strengthens irritated skin. La Roche-Posay Toleriane Double Repair Face Moisturizer is a daily oil-free formula that helps restore healthy, balanced skin. This cream works to strengthen the skin's natural protective barrier, providing up to 48 hours of moisture. Glycerin and Prebiotic Thermal Water gently hydrate, while essential ceramides work to prevent moisture loss. This lightweight La Roche-Posay moisturizer also contains niacinamide, a form of vitamin B3, to soothe a red, irritated complexion. La Roche-Posay Toleriane Double Repair Face Moisturizer is perfect for sensitive skin because it is formulated without parabens, fragrance or dyes. Key Features: Provides up to 48 hours of moisture for normal to dry skin Strengthens the skin's 

In [4]:
# get product reviews
df = pd.read_csv('reviews.csv')
df.head()

Unnamed: 0,Date,Title,Review
0,"April 13, 2024",Mi crema favorita,It's the best cream I've had in years!
1,"April 11, 2024",My Go To!,This is my go to daily morning moisturizer! It...
2,"April 10, 2024",Great,My daughter uses the cream day and night and s...
3,"April 06, 2024",very hydrating,I use this mixed with a small amount of rosehi...
4,"April 06, 2024",Irritation,I started using this moisturizer and another s...


In [5]:
# get the review feature columns
features = df.columns

In [6]:
# view summary statistics of the reviews
df.describe()

Unnamed: 0,Date,Title,Review
count,448,448,448
unique,394,381,448
top,"December 28, 2021",Great moisturizer,It's the best cream I've had in years!
freq,3,12,1


In [7]:
# change the date column to datetime
df['Date'] = pd.to_datetime(df['Date'])
# set reviews to lowercase
df['Review'] = df['Review'].str.lower()
# remove new lines
df['Review'] = df['Review'].str.replace('\n', ' ')
# set titles to lowercase
df['Title'] = df['Title'].str.lower()
# remove new lines
df['Title'] = df['Title'].str.replace('\n', ' ')
# combine the title and review columns
df['Combined_Title_Review'] = df['Title'] + ' ' + df['Review']

In [8]:
df.head()

Unnamed: 0,Date,Title,Review,Combined_Title_Review
0,2024-04-13,mi crema favorita,it's the best cream i've had in years!,mi crema favorita it's the best cream i've had...
1,2024-04-11,my go to!,this is my go to daily morning moisturizer! it...,my go to! this is my go to daily morning moist...
2,2024-04-10,great,my daughter uses the cream day and night and s...,great my daughter uses the cream day and night...
3,2024-04-06,very hydrating,i use this mixed with a small amount of rosehi...,very hydrating i use this mixed with a small a...
4,2024-04-06,irritation,i started using this moisturizer and another s...,irritation i started using this moisturizer an...


In [9]:
# calculate review sentiment
def get_sentiment(review):
    '''Get the sentiment of a review using TextBlob. This function returns 'positive', 'negative', or 'neutral' based on the sentiment score.'''
    score = TextBlob(review).sentiment.polarity
    if score > 0:
        return 'positive'
    elif score < 0:
        return 'negative'
    else:
        return 'neutral'

# apply the function to the combined title and review column
df['Sentiment'] = df['Combined_Title_Review'].apply(get_sentiment)
# check the sentiment distribution
df['Sentiment'].value_counts()

Sentiment
positive    426
negative     14
neutral       8
Name: count, dtype: int64

In [10]:
# set the client
client = OpenAI()

# Set the model to use; large seems to deliver the best empirical results
model = "text-embedding-3-large"

# Create a function to get the embeddings (one by one)
def get_embedding(text):
    '''Get the embeddings for a single text input'''
    response = client.embeddings.create(input=text, model=model)
    return response


In [11]:
# Create a function to get the embeddings (batch by batch)
def get_batch_embedding(list_of_texts):
    '''Get the embeddings for a list of text inputs'''
    response = client.embeddings.create(input=list_of_texts, model=model)
    return response

In [12]:
# Create a function to get the number of tokens in a text string
def num_tokens_from_string(string: str, encoding_name: str = "cl100k_base") -> int:
    '''Get the number of tokens in a text string using the TikToken library and the specified encoding.
    Function takes a string and an encoding name as input and returns the number of tokens. Default encoding is "cl100k_base".'''
    # Get the encoding
    encoding = tiktoken.get_encoding(encoding_name)
    # Get the number of tokens
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [14]:
# get the token count for each review title
df['title_token_count'] = df['Title'].apply(lambda x: num_tokens_from_string(x))
# group the rows into batches of 8000 tokens (the actual limit is 8192 tokens, but we leave some wiggleroom) and embed them
def get_column_embeddings(df, column_name_to_embed, column_token_length, TOKEN_LIMIT=8000):
    '''Get the embeddings for a column in a dataframe, batching the rows based on the token count.'''
    # initialize the token size and batch
    token_size = 0
    batch = []
    embeddings = []
    # loop through the rows
    for index, row in df.iterrows():
        # if the token size exceeds the limit, embed the batch and reset the token size and batch
        if token_size + row[column_token_length] >= TOKEN_LIMIT:
            # get the embeddings for the batch
            embeddings.extend([resp.embedding for resp in get_batch_embedding(batch).data])
            # reset the token size and batch
            batch = []
            token_size = 0
        # append the row to the batch and update the token size
        batch.append(row[column_name_to_embed])
        token_size += row[column_token_length]
    # get the embeddings for the last batch
    embeddings.extend([resp.embedding for resp in get_batch_embedding(batch).data])
    # add the embeddings to the dataframe
    df[column_name_to_embed + '_embeddings'] = embeddings
    return df

In [15]:
df = get_column_embeddings(df, 'Title', 'title_token_count')
df.head()

Unnamed: 0,Date,Title,Review,Combined_Title_Review,Sentiment,title_token_count,Title_embeddings
0,2024-04-13,mi crema favorita,it's the best cream i've had in years!,mi crema favorita it's the best cream i've had...,positive,5,"[-0.007223212625831366, 0.0020467855501919985,..."
1,2024-04-11,my go to!,this is my go to daily morning moisturizer! it...,my go to! this is my go to daily morning moist...,positive,4,"[0.006269732490181923, -0.001512124203145504, ..."
2,2024-04-10,great,my daughter uses the cream day and night and s...,great my daughter uses the cream day and night...,positive,1,"[0.02907642163336277, 0.009133954532444477, -0..."
3,2024-04-06,very hydrating,i use this mixed with a small amount of rosehi...,very hydrating i use this mixed with a small a...,positive,3,"[-0.005457052495330572, 0.006788135971873999, ..."
4,2024-04-06,irritation,i started using this moisturizer and another s...,irritation i started using this moisturizer an...,positive,3,"[-0.026834426447749138, -0.0214895810931921, 0..."


In [16]:
# get the token count for each review summary
df['review_token_count'] = df['Review'].apply(lambda x: num_tokens_from_string(x))
df = get_column_embeddings(df, 'Review', 'review_token_count')
df.head()

Unnamed: 0,Date,Title,Review,Combined_Title_Review,Sentiment,title_token_count,Title_embeddings,review_token_count,Review_embeddings
0,2024-04-13,mi crema favorita,it's the best cream i've had in years!,mi crema favorita it's the best cream i've had...,positive,5,"[-0.007223212625831366, 0.0020467855501919985,...",11,"[-0.01565069518983364, 0.01878083497285843, -0..."
1,2024-04-11,my go to!,this is my go to daily morning moisturizer! it...,my go to! this is my go to daily morning moist...,positive,4,"[0.006269732490181923, -0.001512124203145504, ...",59,"[0.016155187040567398, -0.00991598516702652, -..."
2,2024-04-10,great,my daughter uses the cream day and night and s...,great my daughter uses the cream day and night...,positive,1,"[0.02907642163336277, 0.009133954532444477, -0...",24,"[-0.016059421002864838, -0.016612326726317406,..."
3,2024-04-06,very hydrating,i use this mixed with a small amount of rosehi...,very hydrating i use this mixed with a small a...,positive,3,"[-0.005457052495330572, 0.006788135971873999, ...",53,"[-0.012470364570617676, 0.0065993270836770535,..."
4,2024-04-06,irritation,i started using this moisturizer and another s...,irritation i started using this moisturizer an...,positive,3,"[-0.026834426447749138, -0.0214895810931921, 0...",58,"[-0.00850603450089693, -0.02239195443689823, -..."


In [17]:
# get the token count for each review summary
df['combined_token_count'] = df['Combined_Title_Review'].apply(lambda x: num_tokens_from_string(x))
df = get_column_embeddings(df, 'Combined_Title_Review', 'combined_token_count')
df.head()

Unnamed: 0,Date,Title,Review,Combined_Title_Review,Sentiment,title_token_count,Title_embeddings,review_token_count,Review_embeddings,combined_token_count,Combined_Title_Review_embeddings
0,2024-04-13,mi crema favorita,it's the best cream i've had in years!,mi crema favorita it's the best cream i've had...,positive,5,"[-0.007223212625831366, 0.0020467855501919985,...",11,"[-0.01565069518983364, 0.01878083497285843, -0...",16,"[-0.012092910706996918, 0.029175709933042526, ..."
1,2024-04-11,my go to!,this is my go to daily morning moisturizer! it...,my go to! this is my go to daily morning moist...,positive,4,"[0.006269732490181923, -0.001512124203145504, ...",59,"[0.016155187040567398, -0.00991598516702652, -...",63,"[0.010534127242863178, -0.013197249732911587, ..."
2,2024-04-10,great,my daughter uses the cream day and night and s...,great my daughter uses the cream day and night...,positive,1,"[0.02907642163336277, 0.009133954532444477, -0...",24,"[-0.016059421002864838, -0.016612326726317406,...",25,"[-0.013218075037002563, -0.013568880036473274,..."
3,2024-04-06,very hydrating,i use this mixed with a small amount of rosehi...,very hydrating i use this mixed with a small a...,positive,3,"[-0.005457052495330572, 0.006788135971873999, ...",53,"[-0.012470364570617676, 0.0065993270836770535,...",56,"[-0.015843305736780167, 0.001076939981430769, ..."
4,2024-04-06,irritation,i started using this moisturizer and another s...,irritation i started using this moisturizer an...,positive,3,"[-0.026834426447749138, -0.0214895810931921, 0...",58,"[-0.00850603450089693, -0.02239195443689823, -...",61,"[-0.016827985644340515, -0.01946531981229782, ..."


In [22]:
# positive keywords
keywords_pos = "love, best, favorite, holy grail, amazing, must-have, can't live without, ride or die, top"

# negative keywords
keywords_neg = "hate, worst, terrible, bad, awful, disappointed, regret, never again, avoid"

def keyword_embedding(keywords):
    '''Get the embedding of a list of keywords'''
    # get the embedding of the keywords
    embedding = get_embedding(keywords).data[0].embedding
    return embedding

def calc_similarity(df, embedding, column_name):
    '''Calculate the similarity between the embedding and the embeddings in a dataframe column'''
    df['similarity'] = df[column_name].apply(lambda x: np.dot(embedding, x) / (norm(embedding) * norm(x)))
    # sort the dataframe by similarity
    sorted_df = df.sort_values(by='similarity', ascending=False)
    return sorted_df

In [20]:
# write the dataframe with embeddings to a csv file
df.to_csv('reviews_with_embeddings.csv', index=False)

In [21]:
# form the context from the top 10 similar reviews
def get_context(df):
    '''Get the context from the top 10 similar reviews'''
    context = ""
    for i in range(10):
        context += "Review Date: " + str(df.iloc[i]['Date']) + " \n " + "Title" + df.iloc[i]['Title'] + " \n " + "Review: " + df.iloc[i]['Review']
    return context

In [23]:
df_pos = calc_similarity(df, keyword_embedding(keywords_pos), 'Combined_Title_Review_embeddings')
df_neg = calc_similarity(df, keyword_embedding(keywords_neg), 'Combined_Title_Review_embeddings')

In [24]:
context_pos = get_context(df_pos)
context_neg = get_context(df_neg)

In [25]:
def send_prompt(prompt):
  '''Send a prompt to the OpenAI API and return the response'''
  # write the request to OpenAI API
  completion = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
      {"role": "system", "content": "You are a brand specialist for a company that makes skincare products with expertise in product differntiation techniques and marketing."},
      {"role": "user", "content": prompt}
    ]
  )
  print(completion.choices[0].message.content)
  return(completion.choices[0].message.content)

In [28]:
def save_prompt_response(response, filename):
  '''Save the prompt response to a text file'''
  with open(filename, 'w') as f:
    f.write(response)
    print(f"Response saved to {filename}")

In [26]:
# Image generation using DALL·E 3
def generate_image(client, prompt, size="1024x1024", quality="standard", n=1):
    '''Generate an image using the DALL·E 3 model. Input takes a prompt, size, quality, and number of images to generate.'''
    response = client.images.generate(
        model="dall-e-3",
        prompt=prompt,
        size=size,
        quality=quality,
        n=n,
    )
    return response

In [27]:
def get_image(response):
    '''Get the image from the response and return the image and the image URL.'''
    # Create a client
    client = OpenAI()
    # Get the prompt from the response
    prompt = response
    # Generate the image
    response = generate_image(client, prompt)
    # Get the image URL
    image_url = response.data[0].url
    # get the image for the display
    image = Image(url=image_url)
    # return the image and the image URL
    return image, image_url

In [29]:
def save_image(url, file_name):
    '''Save an image from a URL to a file'''
    # Get the image from the URL
    response = requests.get(url)
    # get the image bytes
    image_bytes = BytesIO(response.content)
    # open the image using PIL
    pil_img = PILImage.open(image_bytes)
    # save the image
    pil_img.save(file_name)
    print(f"Image saved to {file_name}")

In [30]:
def get_image_description(image_url):
  '''Get a description of an image using the GPT-4 Vision model'''
  response = client.chat.completions.create(
  model="gpt-4-vision-preview",
  messages=[
    {
      "role": "user",
      "content": [
        {"type": "text", "text": "Give me a description of this image."},
        {
          "type": "image_url",
          "image_url": {
            "url": image_url,
          },
        },
      ],
    }
  ],
  max_tokens=300,
  )
  return response.choices[0].message.content

In [32]:
def display_image_description(response, filename):
    '''Display the image description and save it to a text file'''
    # get the sentences from the response
    sentences = sent_tokenize(response)
    # display the sentences
    for sentence in sentences:
        print(sentence)
    # save the sentences to a text file
    with open(filename, 'w') as f:
        for sentence in sentences:
            f.write(sentence + '\n')
    print(f"Description saved to {filename}")

### Prompt 1: Text Summarization with Product Description, Product Details, and Top 10 Positive Reviews

In [34]:
prompt1 = "Provide a brief summary of the product given its description, details, and the provided reviews for context. \n Description: " + description + "\n" + context_pos
response1 = send_prompt(prompt1)
save_prompt_response(response1, 'product_summary.txt')
image1, image_url1 = get_image(response1)
display_image_description(get_image_description(image_url1), 'image1_summary.txt')
save_image(image_url1, 'product_image1.png')
image1

The La Roche-Posay Toleriane Double Repair Face Moisturizer is a highly praised skincare product that effectively hydrates, strengthens the skin's moisture barrier, and calms irritated skin with its key ingredients like ceramides and niacinamide. It is suitable for normal to dry skin types and is particularly beneficial for those with sensitive skin as it is formulated without parabens, fragrance, or dyes. The moisturizer offers up to 48 hours of moisture and is lightweight, making it a popular choice among users.

Reviews for the product are overwhelmingly positive, with users describing it as their "holy grail," "favorite," and "best moisturizer ever." They appreciate its rich but non-greasy formula, how it absorbs well into the skin, and its effectiveness in providing hydration without feeling heavy. Many users with various skin concerns such as dryness, acne-prone skin, and sensitivity have found success with this moisturizer, commenting on its gentle yet effective nature.

Overall

### Prompt 2: Feature Extraction with Product Description, Product Details, and Top 10 Positive Reviews

In [43]:
prompt2 = "List notable features of the product given its description, details, and the provided reviews for context. \n Description: " + description + "\n" + context_pos
response2 = send_prompt(prompt2)
save_prompt_response(response2, 'product_features.txt')
image2, image_url2 = get_image(response2)
display_image_description(get_image_description(image_url2), 'image2_features.txt')
save_image(image_url2, 'product_image2.png')
image2

Notable Features of La Roche-Posay Toleriane Double Repair Face Moisturizer:

1. **Hydrating Formula**: Provides up to 48 hours of moisture, suitable for normal to dry skin types.
2. **Skin Barrier Strengthening**: Fortifies skin with essential ceramides to maintain a healthy moisture barrier.
3. **Soothing Niacinamide**: Contains niacinamide (vitamin B3) to calm and soothe red, reactive skin.
4. **Oil-Free Formula**: Lightweight and non-greasy, making it suitable for all skin types.
5. **Key Ingredient - Niacinamide**: Known for reducing the appearance of wrinkles and improving skin texture.
6. **Free from Irritants**: Formulated without parabens, fragrance, or dyes, making it ideal for sensitive skin.
7. **Dermatologist Recommended**: Virtually a dupe for expensive moisturizers, endorsed by dermatologists.
8. **Versatile Usage**: Can be used as a daily facial moisturizer, day or night, to hydrate and nourish skin.
9. **Positive Customer Reviews**: Highly praised by users for its effe

### Prompt 3: Review Sentiment Analysis with Product Description, Product Details, and Top 10 Positive Reviews

In [49]:
prompt3 = "Conduct sentiment analysis of the product reviews given its description, details, and the provided reviews for context. \n Description: " + description + "\n" + context_pos
response3 = send_prompt(prompt3)
save_prompt_response(response3, 'product_sentiment_analysis.txt')
image3, image_url3 = get_image(response3)
display_image_description(get_image_description(image_url3), 'image3_sentiment.txt')
save_image(image_url3, 'product_image3.png')
image3

Based on the sentiment analysis of the provided reviews for the La Roche-Posay Toleriane Double Repair Face Moisturizer, the overall sentiment appears to be predominantly positive. The reviewers express satisfaction and praise for various aspects of the product, highlighting its effectiveness and suitability for different skin types.

1. "my new holy grail" Review: This review indicates high satisfaction and loyalty to the product, describing it as rich, moisturizing, and non-clogging. The reviewer emphasizes its perfect formulation and suitability for all women's skincare routines.

2. "love, love, love" Review: The reviewer expresses admiration for the moisturizer, describing it as wonderful and easy to layer with other products. This positive sentiment indicates a high level of satisfaction with the product's performance.

3. "holy grail!" Review: This review indicates strong loyalty to the product, with the reviewer appreciating its dermatologist-recommended status and effectivenes

### Prompt 4: DALL-E 3 Prompt Generation with Product Description, Product Details, and Top 10 Positive Reviews

In [54]:
prompt4 = "Using the provided product description, details, and reviews for context. Generate a prompt with specific product visuals to use as input to DALL-E 3 with the intention of creating an image of the product described. \n Description: " + description + "\n" + "Reviews: " + context_pos
response4 = send_prompt(prompt4)
save_prompt_response(response4, 'product_visuals_prompt.txt')
image4, image_url4 = get_image(response4)
display_image_description(get_image_description(image_url4), 'image4_visuals.txt')
save_image(image_url4, 'product_image4.png')
image4

Create an image of a sleek, modern skincare product with a minimalist design featuring a white tube or jar labeled "La Roche-Posay Toleriane Double Repair Face Moisturizer." The label should prominently display key features such as "Strengthens skin's moisture barrier," "Provides up to 48 hours of moisture," and "Calms skin with niacinamide." Include subtle graphics or representations of ceramides and the ingredient niacinamide to highlight their benefits. The overall aesthetic should convey a sense of luxury, effectiveness, and dermatologist-recommended quality in line with the brand's reputation.
Response saved to product_visuals_prompt.txt
You're looking at an image of a cosmetic product layout showcasing a tube of Toleriane Double Repair Face Moisturizer.
The composition is arranged methodically against a plain, beige background, giving it a clean and clinical aesthetic.
Various elements that are possibly associated with the product's ingredients or benefits are strategically place

### Prompt 5: Feature Extraction with Product Description, Product Details, and Top 10 Negative Reviews

In [39]:
prompt5 = "Generate a list of product details to highlight for this product in a ad campaign according to negative reviews of the product and its description. \n Description: " + description + "\n" + "Reviews: " + context_neg
response5 = send_prompt(prompt5)
save_prompt_response(response5, 'product_ad_campaign.txt')
image5, image_url5 = get_image(response5)
display_image_description(get_image_description(image_url5), 'image5_ad_campaign.txt')
save_image(image_url5, 'product_image5.png')
image5

Based on the negative reviews and feedback provided for the La Roche-Posay Toleriane Double Repair Face Moisturizer, here are some product details that can be highlighted in an ad campaign to address these concerns and differentiate the product:

1. Lightweight Formula: The La Roche-Posay Toleriane Double Repair Face Moisturizer features a lightweight formula that absorbs easily into the skin without leaving a greasy residue.
2. Non-Comedogenic: Formulated to be non-comedogenic, this moisturizer is suitable for sensitive skin types and will not clog pores, reducing the risk of breakouts.
3. Fragrance-Free: Unlike other products that may have strong scents, this moisturizer is fragrance-free, making it ideal for those sensitive to scents or particular smells.
4. Nourishing Ceramides: Enriched with ceramides, this moisturizer helps fortify the skin's natural barrier, aiding in moisture retention and skin protection.
5. Skin Soothing Niacinamide: With the inclusion of niacinamide (Vitamin