In [1]:
# Import necessary libraries
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity
import pandas as pd
import numpy as np

In [2]:
# Load the csv file as dataframe.
df = pd.read_csv('./data/shampoo_products_data.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Product_ID        21 non-null     int64 
 1   Product_Category  21 non-null     object
 2   Brand_Name        21 non-null     object
 3   Product_Name      21 non-null     object
 4   Product_Details   21 non-null     object
 5   Origin            21 non-null     object
 6   Ingredients       21 non-null     object
dtypes: int64(1), object(6)
memory usage: 1.3+ KB


In [4]:
# Configure the baseline configuration of the OpenAI library.
openai.api_type = "azure"
openai.api_base = "https://PLESAE_ENTER_YOUR_OWNED_AOAI_RESOURCE_NAME.openai.azure.com/"
openai.api_version = "2022-12-01"
openai.api_key = "PLEASE_ENTER_YOUR_OWNED_AOAI_SERVICE_KEY"
engine = "PLEASE_ENTER_YOUR_OWNED_AOAI_TEXT_EMBEDDING_MODEL_NAME"

In [5]:
# Perform word embedding for each product name in the dataframe in vector form.
embeddings = [get_embedding(text, engine=engine) for text in df['Product_Details'].values]

In [6]:
# Calculate the cosine similarity between each product name.
similarity_matrix = np.zeros((len(embeddings), len(embeddings)))
for i in range(len(embeddings)):
    for j in range(len(embeddings)):
        if i == j:
            similarity_matrix[i,j] = 1.0
        elif i < j:
            similarity_matrix[i,j] = cosine_similarity(embeddings[i], embeddings[j])
        else:
            similarity_matrix[i,j] = similarity_matrix[j,i]

In [9]:
# Save the similarity matrix to a CSV file.
np.savetxt('./data/similarity_matrix_sp_d.csv', similarity_matrix, delimiter=',')

In [8]:
# Reload the similarity matrix from the CSV file.
# similarity_matrix = np.loadtxt('./data/similarity_matrix_sp_d.csv', delimiter=',')

In [10]:
# Create a list of Product_Name.
p_name = pd.Series(df['Product_Name'])
p_name

0                 DOVE BOTANIC BREAKAGE PROTECT SHAMPOO
1               DOVE BOTANIC SPLIT ENDS PROTECT SHAMPOO
2                         DOVE LIGHT MOIST PURE SHAMPOO
3                      DOVE JAPAN AIRY MOISTURE SHAMPOO
4                      DOVE JAPAN RICH MOISTURE SHAMPOO
5                           DOVE SHAMPOO STRAIGHT&SILKY
6             VIDAL SASSOON LIGHT & SOFT SMOOTH SHAMPOO
7                 VIDAL SASSOON MOISTURIZING AD SHAMPOO
8             VIDAL SASSOON TEXTURIZED STRAIGHT SHAMPOO
9           VIDAL SASSOON MOISTURING TRT REPAIR SHAMPOO
10                         REJOICE VOLUME FRESH SHAMPOO
11                                  REJOICE OIL REMOVAL
12                            REJOICE OLIVE OIL SHAMPOO
13                      REJOICE MINT REFRESHING SHAMPOO
14                       REJOICE LEMON BALANCED SHAMPOO
15                            REJOICE 3IN1 CARE SHAMPOO
16        ESSENTIAL NOURISHING BREAKAGE DEFENSE SHAMPOO
17            ESSENTIAL MOISTURIZING FRIZZ FREE 

In [11]:
# Create function for Product Recommendations.
def product_recommendations(Product_Name, cos_sim = similarity_matrix):
    recommended_product = []
    idx = p_name[p_name == Product_Name].index[0]
    score_series = pd.Series(cos_sim[idx]).sort_values(ascending = False)
    top_10_indexes = list(score_series.iloc[1:11].index)
    for i in top_10_indexes:
        recommended_product.append(list(df['Product_Name'])[i])
    return recommended_product

In [12]:
# Perform Product Recommendations. It is based on Product Name to figure out the similarity.
product_recommendations('DOVE BOTANIC BREAKAGE PROTECT SHAMPOO')

['DOVE BOTANIC SPLIT ENDS PROTECT SHAMPOO',
 'DOVE JAPAN RICH MOISTURE SHAMPOO',
 'DOVE LIGHT MOIST PURE SHAMPOO',
 'DOVE JAPAN AIRY MOISTURE SHAMPOO',
 'Essential Purify Anti Dandruff Shampoo',
 'Essential Purify Weightlessly Smooth Care Shampoo',
 'DOVE SHAMPOO STRAIGHT&SILKY',
 'VIDAL SASSOON LIGHT & SOFT SMOOTH SHAMPOO',
 'VIDAL SASSOON MOISTURIZING AD SHAMPOO',
 'VIDAL SASSOON MOISTURING TRT REPAIR SHAMPOO']

In [13]:
df_product_sim_matrix = pd.DataFrame(similarity_matrix, index = p_name, columns = p_name)
df_product_sim_matrix

Product_Name,DOVE BOTANIC BREAKAGE PROTECT SHAMPOO,DOVE BOTANIC SPLIT ENDS PROTECT SHAMPOO,DOVE LIGHT MOIST PURE SHAMPOO,DOVE JAPAN AIRY MOISTURE SHAMPOO,DOVE JAPAN RICH MOISTURE SHAMPOO,DOVE SHAMPOO STRAIGHT&SILKY,VIDAL SASSOON LIGHT & SOFT SMOOTH SHAMPOO,VIDAL SASSOON MOISTURIZING AD SHAMPOO,VIDAL SASSOON TEXTURIZED STRAIGHT SHAMPOO,VIDAL SASSOON MOISTURING TRT REPAIR SHAMPOO,...,REJOICE OIL REMOVAL,REJOICE OLIVE OIL SHAMPOO,REJOICE MINT REFRESHING SHAMPOO,REJOICE LEMON BALANCED SHAMPOO,REJOICE 3IN1 CARE SHAMPOO,ESSENTIAL NOURISHING BREAKAGE DEFENSE SHAMPOO,ESSENTIAL MOISTURIZING FRIZZ FREE SHAMPOO,Essential Purify Deep Cleansing Care Shampoo,Essential Purify Weightlessly Smooth Care Shampoo,Essential Purify Anti Dandruff Shampoo
Product_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DOVE BOTANIC BREAKAGE PROTECT SHAMPOO,1.0,0.928959,0.878041,0.876873,0.901449,0.833824,0.832889,0.832889,0.83159,0.832889,...,0.805911,0.815835,0.802616,0.812817,0.826754,0.825154,0.823127,0.825623,0.836162,0.836228
DOVE BOTANIC SPLIT ENDS PROTECT SHAMPOO,0.928959,1.0,0.869186,0.863585,0.892989,0.83728,0.833326,0.833326,0.840185,0.833326,...,0.792143,0.811582,0.797681,0.805669,0.82366,0.829436,0.836982,0.828807,0.848875,0.836704
DOVE LIGHT MOIST PURE SHAMPOO,0.878041,0.869186,1.0,0.891477,0.903847,0.843618,0.871747,0.871747,0.896378,0.871747,...,0.850374,0.855067,0.857645,0.848561,0.856082,0.845249,0.851966,0.857374,0.85223,0.859361
DOVE JAPAN AIRY MOISTURE SHAMPOO,0.876873,0.863585,0.891477,1.0,0.944244,0.840913,0.838934,0.838934,0.848145,0.838934,...,0.829437,0.828079,0.826689,0.812628,0.831174,0.815084,0.827711,0.851902,0.860925,0.856305
DOVE JAPAN RICH MOISTURE SHAMPOO,0.901449,0.892989,0.903847,0.944244,1.0,0.840201,0.845093,0.845093,0.857809,0.845093,...,0.828754,0.852902,0.829016,0.826561,0.834641,0.831375,0.842356,0.873601,0.866325,0.874442
DOVE SHAMPOO STRAIGHT&SILKY,0.833824,0.83728,0.843618,0.840913,0.840201,1.0,0.840713,0.840713,0.860568,0.840713,...,0.822143,0.807144,0.817127,0.824878,0.83383,0.837633,0.844276,0.853283,0.859906,0.853607
VIDAL SASSOON LIGHT & SOFT SMOOTH SHAMPOO,0.832889,0.833326,0.871747,0.838934,0.845093,0.840713,1.0,1.0,0.893384,1.0,...,0.851724,0.857776,0.858899,0.852775,0.86776,0.85346,0.863061,0.862609,0.863478,0.864596
VIDAL SASSOON MOISTURIZING AD SHAMPOO,0.832889,0.833326,0.871747,0.838934,0.845093,0.840713,1.0,1.0,0.893384,1.0,...,0.851724,0.857776,0.858899,0.852775,0.86776,0.85346,0.863061,0.862609,0.863478,0.864596
VIDAL SASSOON TEXTURIZED STRAIGHT SHAMPOO,0.83159,0.840185,0.896378,0.848145,0.857809,0.860568,0.893384,0.893384,1.0,0.893384,...,0.865634,0.875364,0.876321,0.875497,0.876349,0.864219,0.869896,0.865238,0.862635,0.85501
VIDAL SASSOON MOISTURING TRT REPAIR SHAMPOO,0.832889,0.833326,0.871747,0.838934,0.845093,0.840713,1.0,1.0,0.893384,1.0,...,0.851724,0.857776,0.858899,0.852775,0.86776,0.85346,0.863061,0.862609,0.863478,0.864596
