In [1]:
import numpy as np
import pandas as pd
import random
from collections import Counter

In [3]:
df = pd.read_csv('../Dataset/skincare_new.csv', delimiter = ',')
df.shape

(1138, 6)

In [4]:
list_ingredients = []

for i in df['ingredients']:
    ingreds_list = i.split(', ')
    for j in ingreds_list:
        list_ingredients.append(j)

In [5]:
list_ingredients = sorted(set(list_ingredients))
list_ingredients.remove('')
for i in range(len(list_ingredients)):
    if list_ingredients[i][-1] == ' ':
        list_ingredients[i] = list_ingredients[i][0:-1]
        
list_ingredients = sorted(set(list_ingredients))
list_ingredients[10:20]

['7-dehydrocholesterol',
 'abies alba leaf oil',
 'abies balsamea extract',
 'abies sibirica oil',
 'acacia concinna fruit extract',
 'acacia decurrens wax',
 'acacia senegal gum',
 'acacia seyal gum extract',
 'acer saccharum extract',
 'acetate']

In [6]:
one_hot_list = [[0] * 0 for i in range(len(list_ingredients))]

for i in df['ingredients']:
    k=0
    for j in list_ingredients:
        if j in i:
            one_hot_list[k].append(1)
        else:
            one_hot_list[k].append(0)
        k+=1
        
matrix_ingredients = pd.DataFrame(one_hot_list).transpose()
matrix_ingredients.columns = [sorted(set(list_ingredients))]

matrix_ingredients

Unnamed: 0,"1,10-decanediol","1,2-hexanediol",1-methylhydantoin-2-imide,10-hydroxydecanoic acid,"2,6-dimethyl-7-octen-2-ol","2-bromo-2-nitropropane-1,3-diol",2-oleamido-1,3-o-ethyl ascorbic acid,3-octadecanediol,4-t-butylcyclohexanol,...,zinc glycine,zinc laurate,zinc oxide,zinc pca,zinc sulfate,zingiber aromaticus extract,zingiber cassumunar root oil,zingiber officinale root extract,zingiber officinale root oil,zizyphus jujuba seed extract
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1133,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1134,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1135,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1136,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


-----


salicylic acid = oily <br>
ceramide = sensitive <br>
niacinamide = combination <br>
squalene = dry skin <br>
sodium hyaluronate = redness <br>
benzoyl = acne <br>
retinol = aging <br>

In [7]:
def calculate_cosine_similarity(p1, p2):
    dot_product = np.dot(p1, p2)
    norm_1 = np.linalg.norm(p1)
    norm_2 = np.linalg.norm(p2)
    cos_sim = dot_product / (norm_1 * norm_2)
    return cos_sim

In [8]:
def get_product_vector(product):
    binary_list = []
    idx = df[df['skincare_name'] == product].index.item()
    for i in matrix_ingredients.iloc[idx][1:]:
        binary_list.append(i)
    p = np.array(binary_list).reshape(1, -1)
    p = [val for sublist in p for val in sublist]
    return p

In [9]:
def calculate_similarity_scores(p1, input_type):
    similarity_scores = []
    for j in range(input_type.index[0], input_type.index[0] + len(input_type)):
        binary_list2 = []
        for k in matrix_ingredients.iloc[j][1:]:
            binary_list2.append(k)
        p2 = np.array(binary_list2).reshape(1, -1)
        p2 = [val for sublist in p2 for val in sublist]
        cos_sim = calculate_cosine_similarity(p1, p2)
        similarity_scores.append(cos_sim)
    return similarity_scores

In [10]:
def recommend_products(product):
    similarity_scores = []

    p1 = get_product_vector(product)
    prod_type = df['skincare_type'][df['skincare_name'] == product].iat[0]
    input_type = df[df['skincare_type'] == prod_type]
    similarity_scores = calculate_similarity_scores(p1, input_type)

    input_type = pd.DataFrame(input_type)
    input_type['cos_sim'] = similarity_scores
    input_type = input_type.sort_values('cos_sim', ascending=False)
    input_type = input_type[input_type.skincare_name != product] 
    
    return input_type

----

In [14]:
def get_top_ingredients(input_type, product):
    x = 0
    brands = []
    output = []
    ingredients_list = []
    brand_search = df['brand'][df['skincare_name'] == product].iat[0]

    for m in range(len(input_type)):
        brand = input_type['brand'].iloc[x]
        if len(brands) == 0:
            if brand != brand_search:
                brands.append(brand)
                output.append(input_type.iloc[x])
                ingredients_list.append(input_type['ingredients'].iloc[x])
        elif brands.count(brand) < 2:
            if brand != brand_search:
                brands.append(brand)
                output.append(input_type.iloc[x])
                ingredients_list.append(input_type['ingredients'].iloc[x])
        x += 1

    df5 = pd.DataFrame(output)['ingredients'].head(5)

    # Split the ingredients into separate values and flatten the list
    ingredients_list = df5.str.split(', ').sum()

    # Count the occurrences of each ingredient
    ingredient_counts = pd.Series(ingredients_list).value_counts()

    return ingredient_counts.head(5)

In [15]:
def recommend_products_by_ingredient(search_term):
    matching_products = df[df['ingredients'].str.contains(search_term, case=False)]
    matching_products = matching_products.sample(frac=1)
    num_products = min(5, len(matching_products))
    random_products = matching_products.head(num_products)

    combined_output = {}  # Dictionary to store ingredient names and their totals

    for product in random_products['skincare_name']:
        recommended_products = recommend_products(product)
        top_ingredients = get_top_ingredients(recommended_products, product)

        for ingredient, count in top_ingredients.items():
            if ingredient in combined_output:
                combined_output[ingredient] += count
            else:
                combined_output[ingredient] = count
        
    sorted_output = sorted(combined_output.items(), key=lambda x: x[1], reverse=True)

    # Print the combined output
    x = 0
    for ingredient, count in sorted_output:
        # print(f"{ingredient}\t{count}")
        print(ingredient)
        x += 1
        if x == 5:
            break

In [16]:
recommend_products_by_ingredient('squalene')

sodium hyaluronate
phenoxyethanol
butylene glycol
caprylyl glycol
dimethicon


----

In [11]:
def get_top_product(input_type, product):
    x = 0
    brands = []
    output = []
    ingredients_list = []
    brand_search = df['brand'][df['skincare_name'] == product].iat[0]

    for m in range(len(input_type)):
        brand = input_type['brand'].iloc[x]
        if len(brands) == 0:
            if brand != brand_search:
                brands.append(brand)
                output.append(input_type.iloc[x])
                ingredients_list.append(input_type['ingredients'].iloc[x])
        elif brands.count(brand) < 2:
            if brand != brand_search:
                brands.append(brand)
                output.append(input_type.iloc[x])
                ingredients_list.append(input_type['ingredients'].iloc[x])
        x += 1

    return pd.DataFrame(output)[['skincare_name', 'product_url']].head(5)

In [12]:
def recommend_products_by_name(search_term):
    matching_products = df[df['ingredients'].str.contains(search_term, case=False)]
    matching_products = matching_products.sample(frac=1)
    num_products = min(5, len(matching_products))
    random_products = matching_products.head(num_products)

    for _, row in random_products.iterrows():
        product = row['skincare_name']
        recommended_products = recommend_products(product)
        top_ingredients = get_top_product(recommended_products, product)
        print(f"{product}\n{row['product_url']}")

In [13]:
recommend_products_by_name('salicylic acid')

Bioderma Hydrabio Gel Cream 40ml
https://www.lookfantastic.com/bioderma-hydrabio-gel-cream-40ml/11688484.html
Aveda Hand Relief Night Renewal Serum 30ml
https://www.lookfantastic.com/aveda-hand-relief-night-renewal-serum-30ml/11032875.html
GLAMGLOW Supermud Mask 50g
https://www.lookfantastic.com/glamglow-supermud-mask-50g/11422899.html
Garnier Pure Active 3in1 Charcoal Blackhead Mask Wash Scrub 150ml
https://www.lookfantastic.com/garnier-pure-active-3in1-charcoal-blackhead-mask-wash-scrub-150ml/11919612.html
Estée Lauder Perfectionist Pro Rapid Brightening Treatment with Ferment2+ Vitamin C 50ml
https://www.lookfantastic.com/estee-lauder-perfectionist-pro-rapid-brightening-treatment-with-ferment2-vitamin-c-50ml/12449351.html


----

In [23]:
import pickle

In [24]:
functions = {
    'calculate_cosine_similarity': calculate_cosine_similarity,
    'get_product_vector': get_product_vector,
    'calculate_similarity_scores': calculate_similarity_scores,
    'recommend_products': recommend_products,
    'get_top_ingredients': get_top_ingredients,
    'recommend_products_by_ingredient': recommend_products_by_ingredient
}

# Save the functions to a file using pickle
with open('functions.pkl', 'wb') as f:
    pickle.dump(functions, f)

----

In [25]:
with open('functions.pkl', 'rb') as f:
    functions = pickle.load(f)

In [26]:
input_user = 'benzoyl'

# Call the recommend_products_by_ingredient function
recommendations = functions['recommend_products_by_ingredient'](input_user)

# Print the recommendations
print(recommendations)


glycerin	20
parfum	19
cocamidopropyl betaine	15
phenoxyethanol	10
sodium chloride	10
None
