In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/sephoradata/skindataall.csv', index_col=[0])
df.head()

## Content-based recommendations using Ingredients of the product

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Assuming df is your DataFrame containing the skincare dataset

# Selecting relevant columns for content-based recommendation
df_cont = df[['Product', 'Product_id', 'Ingredients', 'Product_Url', 'Ing_Tfidf', 'Rating']]

# Dropping duplicate rows if any
df_cont.drop_duplicates(inplace=True)

# Initializing TF-IDF (Term Frequency-Inverse Document Frequency) vectorizer
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')

# Fitting and transforming TF-IDF matrix on the 'Ingredients_Cleaned' column
tfidf_matrix = tf.fit_transform(df_cont['Ingredients'])

# Calculating cosine similarity between items based on their ingredients
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Resetting the index of the DataFrame
df_cont = df_cont.reset_index(drop=True)

# Creating a Series for product indices
indices = pd.Series(df_cont.index, index=df_cont['Product'])

# Defining a function for content-based recommendations
def content_recommendations(product):
    # Finding the index of the input product
    idx = indices[product]
    
    # Calculating similarity scores between the input product and all other products
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sorting the similarity scores in descending order
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Selecting top 10 most similar products (excluding itself)
    sim_scores = sim_scores[1:11]
    
    # Extracting indices of recommended products
    product_indices = [i[0] for i in sim_scores]
    
    # Returning DataFrame of recommended products
    return df_cont.iloc[product_indices]

In [None]:
content_recommendations('The Rice Polish Foaming Enzyme Powder')