In [1]:
import pandas as pd
import re
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.manifold import TSNE

In [2]:
# load data from csv file (cleaned dataset)

df_4 = pd.read_csv("C:/Users/Amin/project-amin/data cleaning (eda)/sephoramy_webscrape_dataset_clean.csv")

In [3]:
df_4.head()

Unnamed: 0,Category,URL,brand,name,price,skin_concern,rating,Combination,Dry,Normal,Oily,Sensitive
0,cleanser-and-exfoliator,https://www.sephora.my/products/origins-go-go-...,ORIGINS,"Go Go Ginzing™ Skincare Trio To Cleanse, Hydra...",130,"Dark Circles, Dryness, Dullness, Puffiness",0.0,1,1,1,1,0
1,cleanser-and-exfoliator,https://www.sephora.my/products/gallinee-face-...,GALLINÉE,Face Mask & Scrub,45,"Dryness, Dullness, Uneven Skin Tone",4.5,1,1,1,1,1
2,cleanser-and-exfoliator,https://www.sephora.my/products/the-inkey-list...,THE INKEY LIST,Oat Cleansing Balm • 150ml,46,Balm,4.0,1,1,1,1,0
3,cleanser-and-exfoliator,https://www.sephora.my/products/fresh-soy-face...,FRESH,Soy Face Cleanser,177,"Dryness, Ageing, Dullness, Firmness & Elasticity",4.5,1,1,1,1,1
4,cleanser-and-exfoliator,https://www.sephora.my/products/origins-best-o...,ORIGINS,"Best Of The Season Skincare Set To Cleanse, Pr...",65,"Dryness, Dullness, Oiliness",3.5,1,1,1,1,0


In [4]:
# recommend product by user features
category = df_4.Category.unique().tolist()
skintype = df_4.columns[7:].tolist()

def recommend_products_by_user(category, skintype):
    df = df_4[df_4['Category'] == category][df_4[skintype] == 1]
    df = df.sort_values('rating', ascending=False).head()
    
    print('These are top products related to your features:')
    return df

In [5]:
recommend_products_by_user('toner', 'Dry')

These are top products related to your features:


  df = df_4[df_4['Category'] == category][df_4[skintype] == 1]


Unnamed: 0,Category,URL,brand,name,price,skin_concern,rating,Combination,Dry,Normal,Oily,Sensitive
157,toner,https://www.sephora.my/products/lab-series-oil...,LAB SERIES,Oil Control Clearing Solution,105,Oiliness,5.0,1,1,1,1,0
119,toner,https://www.sephora.my/products/origins-dr-and...,ORIGINS,Dr. Andrew Weil for Origins™ Mega-Mushroom Rel...,268,"Acne & Blemishes, Dryness",5.0,1,1,1,1,0
172,toner,https://www.sephora.my/products/la-mer-the-ton...,LA MER,The Tonic,385,"Dryness, Dullness, Uneven Skin Tone",5.0,1,1,1,1,0
146,toner,https://www.sephora.my/products/erno-laszlo-hy...,ERNO LASZLO,Hydraphel Skin Supplement • 200ml,440,"Ageing, Dryness, Dullness",4.5,0,1,1,0,1
133,toner,https://www.sephora.my/products/saturday-skin-...,SATURDAY SKIN,Pore Clarifying Toner,130,"Uneven Skin Texture, Uneven Skin Tone",4.5,1,1,1,1,0


In [6]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df_4['skin_concern'])

In [7]:
test = pd.DataFrame(tfidf_matrix.toarray(), index=df_4.name, columns=tf.get_feature_names())
test.head()

Unnamed: 0_level_0,acne,acne blemishes,ageing,ageing dark,ageing dryness,ageing dullness,ageing fine,ageing pigmentation,ageing uneven,balm,...,uneven,uneven skin,visible,visible pores,wrinkles,wrinkles firmness,wrinkles oiliness,wrinkles pigmentation,wrinkles uneven,wrinkles visible
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Go Go Ginzing™ Skincare Trio To Cleanse, Hydrate & Brighten (Limited Edition)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Face Mask & Scrub,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.30555,0.30555,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Oat Cleansing Balm • 150ml,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Soy Face Cleanser,0.0,0.0,0.249186,0.0,0.0,0.391504,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Best Of The Season Skincare Set To Cleanse, Prep & Hydrate (Limited Edition)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

indices = pd.Series(df_4.index, index=df_4['name'])

In [9]:
# recommend products based on similarities of user past products

def recommend_products_by_content(product):
    idx = indices[product]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:5]
    product_indices = [i[0] for i in sim_scores]
    return (df_4.iloc[product_indices])

In [10]:
recommend_products_by_content('Moist Cream Cleanser')

Unnamed: 0,Category,URL,brand,name,price,skin_concern,rating,Combination,Dry,Normal,Oily,Sensitive
9,cleanser-and-exfoliator,https://www.sephora.my/products/tarte-micellar...,SEPHORA EXCLUSIVE\r\nTARTE,Micellar Magic Makeup Remover & Cleanser • 45ml,45,Dryness,3.5,1,1,1,1,0
17,cleanser-and-exfoliator,https://www.sephora.my/products/sephora-collec...,SEPHORA COLLECTION,All-Over Face & Body Solid Cleanser • 75g,29,Dryness,4.5,1,1,1,1,0
31,cleanser-and-exfoliator,https://www.sephora.my/products/farmacy-green-...,FARMACY,Green Clean Makeup Meltaway Cleansing Balm • 1...,158,Dryness,4.5,1,1,1,1,0
34,cleanser-and-exfoliator,https://www.sephora.my/products/clinique-liqui...,CLINIQUE,Liquid Facial Soap (Oily Skin),100,Dryness,4.0,1,0,0,1,0


In [38]:
model = TSNE(n_components = 2, learning_rate = 200)
tsne_features = model.fit_transform(tfidf_matrix)

In [42]:
df_4['X'] = tsne_features[:, 0]
df_4['Y'] = tsne_features[:, 1]

df_4.head()

Unnamed: 0,Category,URL,brand,name,price,skin_concern,rating,Combination,Dry,Normal,Oily,Sensitive,X,Y
0,cleanser-and-exfoliator,https://www.sephora.my/products/origins-go-go-...,ORIGINS,"Go Go Ginzing™ Skincare Trio To Cleanse, Hydra...",130,"Dark Circles, Dryness, Dullness, Puffiness",0.0,1,1,1,1,0,13.88266,-4.561907
1,cleanser-and-exfoliator,https://www.sephora.my/products/gallinee-face-...,GALLINÉE,Face Mask & Scrub,45,"Dryness, Dullness, Uneven Skin Tone",4.5,1,1,1,1,1,-4.212562,-9.239447
2,cleanser-and-exfoliator,https://www.sephora.my/products/the-inkey-list...,THE INKEY LIST,Oat Cleansing Balm • 150ml,46,Balm,4.0,1,1,1,1,0,1.934783,-9.215893
3,cleanser-and-exfoliator,https://www.sephora.my/products/fresh-soy-face...,FRESH,Soy Face Cleanser,177,"Dryness, Ageing, Dullness, Firmness & Elasticity",4.5,1,1,1,1,1,9.331879,-2.646266
4,cleanser-and-exfoliator,https://www.sephora.my/products/origins-best-o...,ORIGINS,"Best Of The Season Skincare Set To Cleanse, Pr...",65,"Dryness, Dullness, Oiliness",3.5,1,1,1,1,0,-0.654494,11.710971


In [40]:
df_4.to_csv('sephoramy_webscrape_dataset_TSNE.csv', encoding = 'utf-8-sig', index = False)