# Setting Up SkinClaire's Environment ✨




**Imports, Installations, Initializations**

In [1]:
import pandas as pd
import string
# import os
from tabulate import tabulate

In [2]:
#!pip install nltk
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [3]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/aoluwolerotimi/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [4]:
# Initialize VADER Analyzer
sia = SentimentIntensityAnalyzer()

# Loading Previously Cleaned Sephora Datasets 🧼

# Pre-Processing Sephora Datasets 🧼

In [5]:
# Loading reviews dataframe for sentiment analysis
prod_reviews = pd.read_csv('/Users/aoluwolerotimi/Code from Courses/INSY660 - Coding Foundations/SkinClaire for Git/clean_product_reviews.csv') # Change to your local path

# Loading reviews dataframe for product recommendation
product_review_new_df = pd.read_csv('/Users/aoluwolerotimi/Code from Courses/INSY660 - Coding Foundations/SkinClaire for Git/clean_product_reviews.csv') # Change to your local path

# Loading product info dataframe for skin condition screening
product_info = pd.read_csv('/Users/aoluwolerotimi/Code from Courses/INSY660 - Coding Foundations/SkinClaire for Git/product_info.csv') # Change to your local path


In [6]:
# Creating list of product IDs which have reviews for sentiment analysis
reviewed_prods = prod_reviews['product_id'].unique()

# Defining Functions and Global Variables

**Assistive Functions**

In [7]:
# Validate user input function for sentiment analysis
def validate_input(user_input, valid_list):
    return user_input in valid_list

# Validate user input function for product recommendation
def validate_input2(user_input, allowed_values):
    cleaned_input2 = clean_input2(user_input)
    return cleaned_input2 in [value.lower() for value in allowed_values]  # Compare in lowercase

In [8]:
# Clean user input function for sentiment analysis
def clean_input(user_input):
    translator = str.maketrans('', '', string.punctuation)
    cleaned_input = user_input.translate(translator).upper()
    return cleaned_input

# Clean user input function for product recommendation
def clean_input2(user_input):
    translator = str.maketrans('', '', string.punctuation)
    cleaned_input2 = user_input.translate(translator).lower()  # Convert to lowercase
    return cleaned_input2

In [9]:
# Global variable for user menu selection
menu_selection = ""

# Menu selection function
def main_menu():
  print("""SkinClaire: Here are some ways I can help. Would any of these interest you today? If so, enter the corresponding number.

1. Help me pick a product for my skin type
2. Help me decide between products based on previous customer reviews
3. Help me check if the product I'm interested in is safe for my skin condition
4. I don't need help

SkinClaire: Please enter the number for your selection.
""")

  # Loop for input validation
  while True:
      menu_selection = input("You: ")
      menu_selection = clean_input(menu_selection)

      if menu_selection in ["1", "ONE"]:
          print("SkinClaire: Sure, I can help you pick a product for your skin type! Let's get started.")
          menu_selection = "1"
          break
      elif menu_selection in ["2", "TWO"]:
          print("SkinClaire: Sure, I can help you decide between those products! Let's get started.")
          menu_selection = "2"
          break
      elif menu_selection in ["3", "THREE"]:
          print("SkinClaire: Sure, I can help you check if a product is safe for your skin condition! Let's get started.")
          menu_selection = "3"
          break
      elif menu_selection in ["4", "FOUR"]:
          menu_selection = "4"
          break
      else:
          print("SkinClaire: Sorry, I didn't understand. Please enter a valid option (1, 2, 3, or 4).")
  return menu_selection

**Offering 1: Product Recommendation by Skin Type**

In [10]:


def prod_recommendation():

    def get_skin_type():
        Skin_Types = product_review_new_df['skin_type'].unique()
        Skin_Types = [str(skin_type).strip().lower() for skin_type in Skin_Types if isinstance(skin_type, str)]

        print("Skinclaire: Welcome to Skinclaire's recommendation system!")

        while True:
            # Prompt to get the user's skin type.
            user_skin_input = input(
                """Skinclaire: What's your skin type? Please enter one of the following: Oily, Combination, Dry, or Normal.
                You: """)
            user_skin_input = clean_input2(user_skin_input)

            # Validate input
            if validate_input2(user_skin_input, Skin_Types):
                return user_skin_input
            else:
                print(f"Skinclaire: Sorry, I can't seem to identify that skin type {user_skin_input} ")

    def get_product_type():
        Product_Types_Allowed = ['moisturizers', 'cleansers', 'sunscreens']

        while True:
            # Prompt to know if the user wants to specify a product type.
            choice = input("Skinclaire: Would you like to specify a product type? (Yes/No): ").strip().lower()

            if choice == 'yes':
                while True:
                    # Prompt to get the user's preferred product type.
                    user_product_input = input(
                        """Skinclaire: What type of product are you looking for? Please enter one of the following: Moisturizers, Cleansers, or Sunscreens.
                        You: """)
                    user_product_input = clean_input2(user_product_input)

                    if validate_input2(user_product_input, Product_Types_Allowed):
                        return user_product_input
                    else:
                        print(f"Skinclaire: Sorry, I can't seem to identify that product type {user_product_input} ")
            elif choice == 'no':
                print("Skinclaire: Alright! Finding the top-rated products across all types.")
                return None  # Return None to indicate not specifying a product type
            else:
                print("Skinclaire: Please answer with Yes or No.")

    def match_skin_type():
        user_skin_type = get_skin_type()

        if user_skin_type:
            user_product_type = get_product_type()

            # Filter products by user's skin type
            filtered_data = product_review_new_df[
                (product_review_new_df['skin_type'].str.strip().str.lower() == user_skin_type)
            ]

            if user_product_type is not None:
                # Filter products by user-specified product type
                filtered_data = filtered_data[
                    (filtered_data['product_type'].str.strip().str.lower() == user_product_type)
                ]

            # Group by "product_type", "product_id", and "product_name" and calculate the average rating
            average_ratings_by_product = filtered_data.groupby(['product_type', 'product_id', 'product_name'])['rating'].mean().reset_index()

            # Sort the average_ratings_by_product DataFrame by rating in descending order
            sorted_ratings = average_ratings_by_product.sort_values(by='rating', ascending=False)

            # Get the top 10 recommendations
            top_recommendations = sorted_ratings.head(10)

            if top_recommendations.empty:
                print("Skinclaire: No products found for the given input. Please try again.")
            else:
                # Prepare a list to store tabulated data
                tabulated_data = []

                # Add top recommendations to the tabulated list
                for index, row in top_recommendations.iterrows():
                    tabulated_data.append([row['product_name'], row['product_id'], row['rating'], user_skin_type, row['product_type']])

                # Define column headers
                headers = ['Product Name', 'Product ID', 'Rating', 'Skin Type', 'Product Type']

                # Use tabulate to print the tabulated data with adjusted formatting
                print(tabulate(tabulated_data, headers=headers, tablefmt='grid', colalign=("left", "center", "center", "center", "center")))

    # Call the main function
    match_skin_type()

# # Call the function when needed
# prod_recommendation()


**Offering 2: Sentiment Analysis for Tipping Point Purchase Decision**


In [11]:
def sentiment_comparison():
  # Introduce conversation
  print("SkinClaire: Alright, let's find out how other skincare lovers are feeling about the products you're considering")

  # Prompt user for first input
  op_1 = input(
  """SkinClaire: What's the product code for the first product you're considering? Please enter the product code.
You:""")
  op_1 = clean_input(op_1)

  # Validate input
  while not validate_input(op_1, reviewed_prods):
    print(f"SkinClaire: Sorry, I can't seem to find any reviews under {op_1} ")
    op_1 = input("""SkinClaire: Please try again.
  You:""")
    op_1 = clean_input(op_1)

  # Prompt user for second input
  op_2 = input(
  """SkinClaire: Noted! What's the product code for the other product you're considering?
You:""")
  op_2 = clean_input(op_2)

  # Validate input
  while not validate_input(op_2, reviewed_prods):
    print(f"SkinClaire: Sorry, I can't seem to find any reviews under {op_2} ")
    op_2 = input("""SkinClaire: Please try again.
You:""")
    op_2 = clean_input(op_2)

  print(f"SkinClaire: Thank you! Now let's see what other skin care lovers have to say about {op_1} and {op_2}")


  # Create subset dataframes of non-null review text based on user input
  # Create subset based on first productID
  op_1_revs = prod_reviews.loc[prod_reviews['product_id'] == op_1, 'review_text']
  op_1_revs = op_1_revs.dropna()

  # Subset based on second productID
  op_2_revs = prod_reviews.loc[prod_reviews['product_id'] == op_2, 'review_text']
  op_2_revs = op_2_revs.dropna()

  # Loop through dataframe with VADER for each product to generate scores based on review title and review text.
  op_1_sent = 0.0
  op_2_sent = 0.0
  max_prod = ""

  # Sum VADER compound scores
  for review in op_1_revs:
      scores = sia.polarity_scores(review)
      op_1_sent += scores["compound"]

  for review in op_2_revs:
      scores = sia.polarity_scores(review)
      op_2_sent += scores["compound"]

  # Calculate final scores
  op_1_sent = op_1_sent / len(op_1_revs)
  op_2_sent = op_2_sent / len(op_2_revs)

  # Determine score comparison
  if op_1_sent > op_2_sent:
      max_score = op_1_sent
      max_prod = op_1
  elif op_1_sent < op_2_sent:
      max_score = op_2_sent
      max_prod = op_2
  else:
      max_score = "both"

  # Ouput results
  if max_score == "both":
      print(f"""
SkinClaire: Looks like it's a tie!
SkinClaire: Based on the {len(op_1_revs)} reviews I analyzed for {op_1} and {len(op_2_revs)} I analyzed for {op_2}, previous customers feel the same about both products.
SkinClaire: On average, the sentiment from the reviews is about a {round((op_1_sent * 10), 1)} out of 10""")
  else:
      print(f"""
SkinClaire: Based on the {len(op_1_revs)} reviews I analyzed for {op_1} and {len(op_2_revs)} I analyzed for {op_2}, previous customers feel more positive about {max_prod}.
SkinClaire: On average, the sentiment from the reviews for {op_1} is about a {round((op_1_sent * 10), 1)} out of 10, while for {op_2}, it's about a {round((op_2_sent * 10), 1)} out of 10 """)

  print("SkinClaire: I hope this helps!")
  print("SkinClaire: Thanks for chatting with me today. Goodbye!")

**Offering 3: Product Screening by Skin Condition**


In [12]:
def main_program(product_info):
    print("SkinClaire: Hello! Welcome to SkinClaire, your personalized skin product checker.")

    while True:
        choice = input("SkinClaire: Do you have a product you'd like to run a skin risk test on? (yes/no): ").strip().lower()

        if choice == 'yes':
            condition_screening(product_info)

            cont = input("SkinClaire: Would you like to check another product? (yes/no): ").lower()
            if cont == 'no':
                print("SkinClaire: Thank you for using our service. Stay safe and take care of your skin!")
                break
            elif cont != 'yes':
                print("SkinClaire: I didn't understand that response. Exiting the program. Feel free to start again!")
                break
        elif choice == 'no':
            print("SkinClaire: That's okay! Feel free to come back anytime. Take care!")
            break
        else:
            print("SkinClaire: I'm sorry, I didn't understand that. Please answer with 'yes' or 'no'.")


def condition_screening(product_info):
    while True:  # This loop will keep running until we break out of it
        # Prompt the user for a product ID
        product_id = input("""SkinClaire: Please enter the product ID you want to check:
        You: """).capitalize()

        # Filter the DataFrame for the given product ID
        product_row = product_info[product_info['product_id'] == product_id]

        # If the product ID exists in the DataFrame
        if not product_row.empty:
            product_name = product_row['product_name'].iloc[0]
            brand_name = product_row['brand_name'].iloc[0]

            confirmation = input(f"""SkinClaire: The product name for ID {product_id} is '{product_name}' by '{brand_name}'. Is this correct? (yes/no):
            You: """)

            if confirmation.lower() == 'yes':
                print("SkinClaire: Thank you for confirming!")
                break  # This will exit the loop and continue to the next part of the function
            else:
                print("SkinClaire: Please check the product ID and try again.")
        # If the product ID does not exist in the DataFrame
        else:
            print(f"SkinClaire: No product found with ID {product_id}. Please check and try again.")

    # Calling get_skin_condition and passing the required parameters to it
    get_skin_condition(product_name, product_info)



def get_skin_condition(product_name, product_info):
    sensitivity_ingredients = {
        'Malasezia': ["aspergillus ferment", "bacillus ferment", "alteromonas ferment extract", "lactococcus ferment lysate", "lactococcus ferment", "pseudoalteromonas exopolysaccharides", "pseudoalteromonas ferment extract", "thermus thermophillus ferment", "leuconostoc/radish root ferment filtrate", "leuconostoc/radish root fermentfiltrate", "rice ferment filtrate (sake)", "rice ferment filtrate", "rice filtrate ferment", "saccharomyces xylinum (kombucha) black tea ferment", "saccharomyces/camellia sinensis leaf/cladosiphon okamuranus/rice ferment filtrate", "saccharomyces/xylinum/black tea ferment", "saccharomyces/rice ferment filtrate extract", "saccharomyces/rice ferment filtrate", "aspergillus/rice ferment filtrate", "bacillus/soybean/folic acid ferment", "lactobacillus/eriodictyon californicum ferment extract", "lactobacillus/pumpkin fruit ferment filtrate", "lactobacillus/rice ferment filtrate", "lactobacillus/tomato fruit ferment extract", "lactobacillus/wasabia japonica root ferment extract", "lactobacillus/water hyacinth ferment", "saccharomyces cerevisiae extract", "saccharomyces cerevisiae", "saccharomyces", "saccharomyces ferment filtrate", "saccharomyces ferment", "saccharomyces ferments", "saccharomyces (lysate", "copper", "zinc) ferment", "saccharomyces (lysate copper zinc) ferment", "saccharomyces/copper ferment", "saccharomyces/iron ferment", "saccharomyces/magnesium ferment", "saccharomyces/silicon ferment", "saccharomyces/zinc ferment", "yeast", "yeast extract", "faex (yeast extract)", "hydrolyzed yeast extract", "yeast extract/faex/extrait de levureacai", "sweet almond", "andiroba", "apple", "apricot", "argan", "avocado", "baobab", "babassu", "badger", "barbary fig", "bear", "beefsteak", "bees", "blackberry", "black sea rod", "european blueberry", "borage", "brazil nut", "broccoli", "buriti", "cacay", "candelilla", "carnauba", "carrot", "castor", "celery", "wild celery", "bitter cherry", "sweet cherry", "chia", "chinaberry", "cloudberry", "cocoa", "coconut", "cod liver", "coffee", "cotton", "corn", "crambe", "cranberry", "cucumber", "cumin", "black cumin", "cupuacu", "black currant", "egg", "emu", "evening primrose", "fish", "flower", "galip", "ginseng", "goji", "grapeseed", "hazelnut", "horse", "chilean hazelnut", "harakeke", "hemp", "illipe", "kiwi", "jojoba", "karanja", "kukui", "lingonberry", "linseed", "lanolin", "lard", "mushroom", "milk", "mink", "morel", "macadamia", "mango", "marula", "meadowfoam", "mongongo", "moringa", "musk mallow", "mustard", "neem", "olive", "olus", "tsubaki", "palm", "brazillian palm", "south american palm", "papaya", "passion fruit", "passion flower", "pataua", "peach", "peanut", "pecan", "black pepper", "pequi", "japanese pine", "korean pine", "siberian pine", "pistachio", "common plum", "hog plum", "pomegranate", "pongamia", "poppy", "opium poppy", "pracaxi", "pumpkin", "purcellin", "purple viper's bugloss", "radish", "rapeseed", "raspberry", "rice", "dog rose", "musk rose", "rosehip", "rose", "safflower", "seabuckthorn", "sesame", "shea", "shorea", "east african shea", "soybean", "sponge gourd", "sunflower", "sweet brier", "tamanu", "tallow", "tall", "truffle", "tomato", "vegetable", "watermelon", "wheat", "white lupinUndecylenic Acid", "Lauric Acid", "Tridecylic Acid", "Myristic Acid", "Pentadecylic acid", "Palmitic Acid", "Palmitoleic Acid", "Margaric Acid", "Stearic Acid", "Vaccenic Acid", "Oleic Acid", "Elaidic Acid", "Linoleic Acid", "Linolelaidic Acid", "Alpha-linolenic Acid", "Gamma-linolenic Acid", "Stearidonic Acid", "Nonadecylic Acid", "Arachidic Acid", "Gondoic Acid", "Dihomo-Y-linolenic Acid", "Mead Acid", "Arachidonic Acid", "Eicosapentaenoic Acid", "Heneicosylic Acid", "Behenic Acid", "Erucic Acid", "Docosatetraenoic Acid", "Docosahexaenoic Acid", "Tricosylic Acid", "Lignoceric Acid", "Nervonic Acidgalactomyces", "galactomyces ferment filtrate", "galactomyces filtrate ferment", "saccharomyces/rosa canina fruit oil ferment filtrate", "saccharomyces/linseed oil ferment", "saccharomyces/linseed oil ferment filtrate", "saccharomyces/argania spinosa kernel oil ferment filtrate", "monascus/brassica napus seed oil ferment", "monascus/brassica napus seed oil ferment extract", "pseudozyma epicola/soybean flour/argania spinosa kernel oil ferment filtrate", "pseudozyma epicola/soybean flour/olive fruit oil ferment filtrate", "saccharomyces/sunflower seed oil ferment filtrate"
              ],
        'Rosacea': ["Alcohol", "Fragrance", "Esters", "Retinol"],
        'Eczema': ["Alcohol", "Fragrance", "Glycol"]
        }
    available_conditions = {key.capitalize() for key in sensitivity_ingredients}
# Find the row for the given product_name
    product_row = product_info[product_info['product_name'] == product_name]

    while True:  # Keep asking until a valid input is received
        skin_condition = input("""SkinClaire: Which skin sensitivity do you want to test the product against? (Options: Malasezia, Rosacea, Eczema):
        You:""").capitalize()

        if skin_condition in available_conditions:

            # Compare product ingredients with sensitivity ingredients
            product_ingredients = product_row['ingredients'].iloc[0].split(', ')
            offending_ingredients = set(product_ingredients).intersection(set(sensitivity_ingredients[skin_condition]))

            if offending_ingredients:
                print(f"""SkinClaire: {product_name} contains {len(offending_ingredients)} ingredient(s) known to promote {skin_condition}.\nThese ingredients are not be suitable for {skin_condition}: {', '.join(offending_ingredients)}""")
            else:
                print(f"SkinClaire: This product seems safe for {skin_condition}.")
            break  # If a valid skin condition is chosen, we break out of the loop
        else:
            print("SkinClaire: We don't have data for that skin sensitivity. Please try another one.")


#main_program(product_info)

# SkinClaire in Action 🎬

In [13]:
# Introduction message
print("SkinClaire: Hi there, I'm SkinClaire, your skincare shopping assistant.")

menu_selection = main_menu()

# Conversation flow based on menu selection
if menu_selection == "1":
  prod_recommendation()
elif menu_selection == "2":
  sentiment_comparison()
elif menu_selection == "3":
  condition_screening(product_info)
else:
  print("SkinClaire: Goodbye, and happy shopping!") #End user interaction


SkinClaire: Hi there, I'm SkinClaire, your skincare shopping assistant.
SkinClaire: Here are some ways I can help. Would any of these interest you today? If so, enter the corresponding number.

1. Help me pick a product for my skin type
2. Help me decide between products based on previous customer reviews
3. Help me check if the product I'm interested in is safe for my skin condition
4. I don't need help

SkinClaire: Please enter the number for your selection.

You: 2
SkinClaire: Sure, I can help you decide between those products! Let's get started.
SkinClaire: Alright, let's find out how other skincare lovers are feeling about the products you're considering
SkinClaire: What's the product code for the first product you're considering? Please enter the product code.
You:P311143
SkinClaire: Noted! What's the product code for the other product you're considering?
You:P500019
SkinClaire: Thank you! Now let's see what other skin care lovers have to say about P311143 and P500019

SkinClaire