In [1]:
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
import ast

# Loading and transforming the dataset to meet the desired specifications

In [2]:
metadf = pd.read_csv('https://raw.githubusercontent.com/ascottR/Recommendation_SYS/refs/heads/main/artifacts/meta_final.csv')

reviewdf = pd.read_csv('https://raw.githubusercontent.com/ascottR/Recommendation_SYS/refs/heads/main/artifacts/review_final.csv')

In [3]:
metadf.columns

Index(['Unnamed: 0', 'title', 'average_rating', 'rating_number', 'features',
       'description', 'price', 'images', 'store', 'categories', 'details',
       'parent_asin'],
      dtype='object')

In [4]:
reviewdf.columns

Index(['user_id', 'rating', 'title', 'asin', 'parent_asin', 'timestamp',
       'helpful_vote', 'verified_purchase'],
      dtype='object')

In [5]:
metadf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29356 entries, 0 to 29355
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Unnamed: 0      29356 non-null  int64  
 1   title           29356 non-null  object 
 2   average_rating  29356 non-null  float64
 3   rating_number   29356 non-null  int64  
 4   features        29356 non-null  object 
 5   description     29356 non-null  object 
 6   price           29356 non-null  float64
 7   images          29356 non-null  object 
 8   store           29338 non-null  object 
 9   categories      29356 non-null  object 
 10  details         29356 non-null  object 
 11  parent_asin     29356 non-null  object 
dtypes: float64(2), int64(2), object(8)
memory usage: 2.7+ MB


In [6]:
metadf['title']

0        Portable Karaoke Machine for Adults, Unique Gi...
1        Musical Instrument Lyre Harp, 16 Metal Strings...
2        Set of 5 White Guitar Picks, 1.20mm Thick with...
3               Danelectro Stock '59 Electric Guitar - Red
4        HENGYEE 52.5mm Guitar Stratocaster Tremolo Bri...
                               ...                        
29351    American Dj Big Shot Led Variable Speed Led Po...
29352    Golden Age Tune-o-matic Bridge For Archtop Gui...
29353    Fender Vintera 60s Stratocaster Electric Guita...
29354                                     VocoPro (VHF4000
29355    Saxophone Reeds Case, PU Leather Saxophone Cla...
Name: title, Length: 29356, dtype: object

In [7]:
print(f"Mean global rating: {round(metadf['average_rating'].mean(),2)}.")

Mean global rating: 4.28.


In [8]:
# Dropping unnecessary columns from reviewdf
processed_df = reviewdf.drop(['timestamp', 'helpful_vote','asin','title'], axis=1)

# Function to convert string representations of lists to actual lists
def convert_strings_to_lists(df, columns):
    for column in columns:
        df[column] = df[column].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) and x.startswith('[') else x)

# Apply the function to the rating and parent_asin columns
convert_strings_to_lists(processed_df, ['rating', 'parent_asin','verified_purchase'])

#explode the dataset
df_exploded = processed_df.explode(['rating', 'parent_asin','verified_purchase'])

df_exploded.shape


(250965, 4)

In [9]:
reviewdf.columns

Index(['user_id', 'rating', 'title', 'asin', 'parent_asin', 'timestamp',
       'helpful_vote', 'verified_purchase'],
      dtype='object')

In [10]:
df_exploded

Unnamed: 0,user_id,rating,parent_asin,verified_purchase
0,AE2226PENZTTCDKFGRTUCUX2NU2Q,5,B01DECWM0G,True
0,AE2226PENZTTCDKFGRTUCUX2NU2Q,5,B00HU25WSQ,False
1,AE2252DKW4XJIZP5QPFMQVJBVRTA,5,B07F2NZWY5,True
1,AE2252DKW4XJIZP5QPFMQVJBVRTA,4,B091R14NS3,True
2,AE225P7FIMCUQD3TPAOF5LSGCLYQ,4,B0757Q45NK,True
...,...,...,...,...
85035,AHZZX3DQ6IZHVV7GI77GL4PAQKCA,5,B07635X9VJ,True
85036,AHZZXAWBVWME2D72MMSB6HU7TCAA,5,B09M7CRWH3,True
85036,AHZZXAWBVWME2D72MMSB6HU7TCAA,5,B0BFKQ9QXD,True
85036,AHZZXAWBVWME2D72MMSB6HU7TCAA,4,B09V91H5XM,True


In [11]:
metadf.shape

(29356, 12)

In [12]:
# Step 1: Count the number of ratings per user
user_rating_counts = df_exploded.groupby('user_id').size()

# Step 2: Create a boxplot to visualize outliers
#plt.figure(figsize=(10, 6))
#sns.boxplot(x=user_rating_counts)

# Step 3: Add titles and labels
#plt.title('Boxplot of Number of Reviews per User', fontsize=16)
#plt.xlabel('Number of Reviews', fontsize=12)

# Step 4: Show the plot
#plt.show()

In [13]:
# Step 1: Count the number of ratings per user
user_rating_counts = df_exploded.groupby('user_id').size()

# Step 2: Calculate IQR to detect outliers
Q1 = np.percentile(user_rating_counts, 25)  # First quartile (25%)
Q3 = np.percentile(user_rating_counts, 75)  # Third quartile (75%)
IQR = Q3 - Q1  # Interquartile Range

# Step 3: Define the lower and upper bounds to detect outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Step 4: Filter users within the bounds (i.e., non-outliers)
non_outlier_users = user_rating_counts[(user_rating_counts >= lower_bound) & (user_rating_counts <= upper_bound)].index

# Step 5: Keep only the rows of users that are not outliers
df_filtered_no_outliers = df_exploded[df_exploded['user_id'].isin(non_outlier_users)]

# Step 6: Show the shape of the filtered DataFrame
print("Filtered DataFrame (users without outliers):")
print(df_filtered_no_outliers.shape)

Filtered DataFrame (users without outliers):
(180825, 4)


In [14]:
df_filtered_no_outliers

Unnamed: 0,user_id,rating,parent_asin,verified_purchase
0,AE2226PENZTTCDKFGRTUCUX2NU2Q,5,B01DECWM0G,True
0,AE2226PENZTTCDKFGRTUCUX2NU2Q,5,B00HU25WSQ,False
1,AE2252DKW4XJIZP5QPFMQVJBVRTA,5,B07F2NZWY5,True
1,AE2252DKW4XJIZP5QPFMQVJBVRTA,4,B091R14NS3,True
2,AE225P7FIMCUQD3TPAOF5LSGCLYQ,4,B0757Q45NK,True
...,...,...,...,...
85035,AHZZX3DQ6IZHVV7GI77GL4PAQKCA,5,B07635X9VJ,True
85036,AHZZXAWBVWME2D72MMSB6HU7TCAA,5,B09M7CRWH3,True
85036,AHZZXAWBVWME2D72MMSB6HU7TCAA,5,B0BFKQ9QXD,True
85036,AHZZXAWBVWME2D72MMSB6HU7TCAA,4,B09V91H5XM,True


In [15]:
null_values = df_filtered_no_outliers.isnull().sum()

# Print the result
print(null_values)

user_id              0
rating               0
parent_asin          0
verified_purchase    0
dtype: int64


# Most Popular Product Reccomendation
Most Populer product reccomendation based on average_rating and rating_number.

In [16]:
#sns.histplot(metadf['rating_number'], bins=30, kde=False)
#plt.title("Distribution of the number of ratings per item")
#plt.xlabel("Number of Ratings")
#plt.ylabel("Count of Items")
#plt.show()

In [17]:
# Round the 'average_rating' to the nearest 0.5
metadf['rounded_rating'] = np.round(metadf['average_rating'] * 2) / 2

# Plot the distribution of the rounded ratings
#sns.countplot(x="rounded_rating", data=metadf, palette="viridis")
#plt.title("Distribution of product ratings (rounded to nearest 0.5)", fontsize=14)
#plt.show()

In [18]:
# Calculate the overall average rating
overall_average = metadf['average_rating'].mean()

# Set a minimum count of ratings to consider
C = metadf['rating_number'].quantile(0.75)
print(f"75th percentile of rating counts: {C}")

# Calculate Bayesian average for each item
metadf['bayesian_average'] = (metadf['rating_number'] * metadf['average_rating'] + C * overall_average) / (metadf['rating_number'] + C)

# Rank items based on the Bayesian average
ranked_items = metadf.nlargest(10, 'bayesian_average')

# Display the results
ranked_items.sort_values('bayesian_average', ascending=False).head(10)

75th percentile of rating counts: 57.0


Unnamed: 0.1,Unnamed: 0,title,average_rating,rating_number,features,description,price,images,store,categories,details,parent_asin,rounded_rating,bayesian_average
6602,4,Shure SM7B Vocal Dynamic Microphone for Broadc...,4.9,9512,['ONE MICROPHONE FOR EVERYTHING - Studio Recor...,"['The SM7B dynamic microphone has a smooth, fl...",399.0,{'hi_res': 'https://m.media-amazon.com/images/...,Shure,"['Microphones & Accessories', 'Microphones', '...","{'Item Weight': '2.7 pounds', 'Product Dimensi...",B0B89ZSYS7,5.0,4.896281
16936,13972,Jim Dunlop Tortex Standard 1.14mm Purple Guita...,4.9,4367,"['The Pick used by top musicians worldwide', '...",['The pick used by top musicians worldwide. To...,16.47,{'hi_res': 'https://m.media-amazon.com/images/...,JIM DUNLOP,"['Instrument Accessories', 'Guitar & Bass Acce...","{'Item Weight': '1.12 ounces', 'Product Dimens...",B07F5PX9Y3,5.0,4.891956
16377,2941,"Evans Genera HD Dry Snare Drum Head, 13” (Whit...",4.9,1729,['TWO PLIES OF FILM – The 13” Evans Genera HD ...,"['Product Description', 'Whether you play rock...",21.99,{'hi_res': 'https://m.media-amazon.com/images/...,Evans,"['Instrument Accessories', 'Drum & Percussion ...","{'Item Weight': '0.6 Pounds', 'Product Dimensi...",B0BK6S7QF4,5.0,4.880075
19791,6102,"Remo E1-0312-00 Buffalo Drum - Standard, 12""",4.9,1701,"['12"" diameter x 3.5"" thick pre-tuned buffalo ...","['E10312-00 Size: 12"" x 3.5"" Features: -Buffal...",71.95,{'hi_res': 'https://m.media-amazon.com/images/...,Remo,"['Drums & Percussion', 'Folk & World Hand Drum...","{'Item Weight': '0.8 ounces', 'Product Dimensi...",B09RCVDQ8M,5.0,4.879758
8066,166524,"Remo Buffalo Drum - Black Earth, 16""",4.9,1079,"['Size: 16"" Diameter, 3. 5"" Depth', 'Produces ...",['The Bahia bass buffalo drums produce a low f...,104.95,{'hi_res': 'https://m.media-amazon.com/images/...,Remo,"['Drums & Percussion', 'Drum Sets & Set Compon...","{'Item Weight': '16 Ounces', 'Product Dimensio...",B0BLVHR9MG,5.0,4.868675
85,21417,"Evans REMAD Resonant Bass Drum Head, 20 Inch",4.9,724,"['20"" resonant drum head made using a single p...","['With accentuated low-end response, the EMAD ...",52.99,{'hi_res': 'https://m.media-amazon.com/images/...,Evans,"['Instrument Accessories', 'Drum & Percussion ...","{'Item Weight': '1.63 pounds', 'Item model num...",B09HCVK9HL,5.0,4.854436
2648,108783,Sennheiser Professional e 609 Silver Super-Car...,4.9,722,['Supercardioid pickup pattern: Provides insul...,"['Product Description', ""Sennheiser e609 Silve...",99.95,{'hi_res': 'https://m.media-amazon.com/images/...,Sennheiser Pro Audio,"['Microphones & Accessories', 'Microphones', '...","{'Item Weight': '4.4 ounces', 'Product Dimensi...",B073WND433,5.0,4.854319
15475,139476,GraphTech PQL-7004-00 TUSQ Sleek String Trees,4.9,680,"['Two XL string trees', 'Two long chrome screw...",['TUSQ Sleek String Trees'],13.98,{'hi_res': 'https://m.media-amazon.com/images/...,Graph Tech,"['Instrument Accessories', 'Guitar & Bass Acce...","{'Item Weight': '0.81 ounces', 'Product Dimens...",B0B29LK8QD,5.0,4.851716
20820,97017,Jim Dunlop Hetfield's White Fang Custom 1.14mm...,4.9,678,['Combines the specially engineered shape of F...,"[""Jim Dunlop Hetfield's White Fang Custom 1.14...",12.59,{'hi_res': 'https://m.media-amazon.com/images/...,JIM DUNLOP,"['Instrument Accessories', 'Guitar & Bass Acce...","{'Item Weight': '0.634 ounces', 'Product Dimen...",B0B8Z1RBY4,5.0,4.851585
4910,88759,Ernie Ball 2221 Nickel Slinky Lime Guitar Stri...,4.9,604,['Nickel-plated steel wire wrapped around tin-...,['What a deal! Two free packs of Slinkys when ...,59.88,{'hi_res': 'https://m.media-amazon.com/images/...,Ernie Ball,"['Instrument Accessories', 'Guitar & Bass Acce...","{'Item Weight': '15.2 ounces', 'Product Dimens...",B004HNZZCY,5.0,4.846164


# Check Sparsity

In [19]:
from scipy.sparse import csr_matrix

def create_X_custom(df):
    """
    Generates a sparse matrix from a DataFrame containing user ratings for products.

    Args:
        df: pandas DataFrame containing user_id, parent_asin, and ratings (as lists)

    Returns:
        X: sparse matrix
        user_mapper: dict that maps user id's to user indices
        user_inv_mapper: dict that maps user indices to user id's
        item_mapper: dict that maps item id's to item indices
        item_inv_mapper: dict that maps item indices to item id's
    """
    # Explode the DataFrame to create individual rows for each rating
    #df_filtered_no_outliers = df.explode(['rating', 'parent_asin'])

    # Ensure the rating column contains numeric values
    df_exploded['rating'] = df_exploded['rating'].astype(float)

    # Check data types
    print("Data types after explosion:")
    print(df_exploded.dtypes)

    # Count unique users and items
    M =  df_exploded['user_id'].nunique()  # Unique users
    N =  df_exploded['parent_asin'].nunique()  # Unique items

    # Print M and N
    print("Number of unique users (M):", M)
    print("Number of unique items (N):", N)

    user_mapper = dict(zip(np.unique(df_exploded["user_id"]), list(range(M))))
    item_mapper = dict(zip(np.unique(df_exploded["parent_asin"]), list(range(N))))

    user_inv_mapper = dict(zip(list(range(M)), np.unique(df_exploded["user_id"])))
    item_inv_mapper = dict(zip(list(range(N)), np.unique(df_exploded["parent_asin"])))

    user_index = [user_mapper[i] for i in df_exploded['user_id']]
    item_index = [item_mapper[i] for i in df_exploded['parent_asin']]

    # Create the sparse matrix
    X = csr_matrix((df_exploded["rating"], (user_index, item_index)), shape=(M, N))

    return X, user_mapper, item_mapper, user_inv_mapper, item_inv_mapper

# Example usage
X, user_mapper, item_mapper, user_inv_mapper, item_inv_mapper = create_X_custom(df_filtered_no_outliers)

Data types after explosion:
user_id               object
rating               float64
parent_asin           object
verified_purchase     object
dtype: object
Number of unique users (M): 85037
Number of unique items (N): 22103


In [20]:
from scipy.io import mmwrite

# Save the sparse matrix X to a file in Matrix Market format
mmwrite('sparse_matrix.mtx', X)

In [21]:
n_total = X.shape[0]*X.shape[1]
n_ratings = X.nnz
sparsity = n_ratings/n_total
print(f"Matrix sparsity: {round(sparsity*100,2)}%")

Matrix sparsity: 0.01%


In [22]:
n_ratings_per_user = X.getnnz(axis=1)
len(n_ratings_per_user)

85037

In [23]:
print(f"Most active user rated {n_ratings_per_user.max()} items.")
print(f"Least active user rated {n_ratings_per_user.min()} items.")

Most active user rated 107 items.
Least active user rated 1 items.


In [24]:
n_ratings_per_item = X.getnnz(axis=0)
len(n_ratings_per_item)

print(f"Most rated item has {n_ratings_per_item.max()} ratings.")
print(f"Least rated item has {n_ratings_per_item.min()} ratings.")

Most rated item has 1759 ratings.
Least rated item has 1 ratings.


In [None]:
#plt.figure(figsize=(16,4))
#plt.subplot(1,2,1)
#sns.kdeplot(n_ratings_per_user, shade=True)
#plt.xlim(0)
# plt.title("Number of Ratings Per User", fontsize=14)
# plt.xlabel("number of ratings per user")
# plt.ylabel("density")
# plt.subplot(1,2,2)
# sns.kdeplot(n_ratings_per_item, shade=True)
# plt.xlim(0)
# plt.title("Number of Ratings Per Item", fontsize=14)
# plt.xlabel("number of ratings per Item")
# plt.ylabel("density")
# plt.show()

# Colaberative Filtering

In [25]:
from sklearn.neighbors import NearestNeighbors

def find_similar_products_by_title(product_title, X, item_mapper, item_inv_mapper, product_titles, k, metric='cosine'):
    """
    Finds k-nearest neighbours for a given product title.

    Args:
        product_title: Title of the product of interest
        X: user-item utility matrix (sparse matrix)
        k: number of similar products to retrieve
        metric: distance metric for kNN calculations

    Output: returns list of k similar product details based on product titles
    """
    # Map the product title to its corresponding product ID
    product_id = None
    for pid, title in product_titles.items():
        if title.lower() == product_title.lower():  # Case-insensitive match
            product_id = pid
            break

    if product_id is None:
        raise ValueError(f"Product with title '{product_title}' not found in the dataset.")

    # Transpose the user-item matrix so products are the rows
    X = X.T
    neighbour_ids = []

    # Get the index of the product using its ID
    product_ind = item_mapper[product_id]
    product_vec = X[product_ind]

    # Reshape the product vector to be compatible with kneighbors
    if isinstance(product_vec, np.ndarray):
        product_vec = product_vec.reshape(1, -1)

    # Use k+1 since kNN output includes the product ID of interest
    kNN = NearestNeighbors(n_neighbors=k + 1, algorithm="brute", metric=metric)
    kNN.fit(X)

    # Find the nearest neighbours
    neighbour = kNN.kneighbors(product_vec, return_distance=False)

    # Collect similar product IDs, skipping the first one (the product itself)
    for i in range(1, k + 1):  # Start from 1 to skip the input product
        n = neighbour.item(i)
        neighbour_ids.append(item_inv_mapper[n])

    return neighbour_ids


# Example product_titles mapping 'parent_asin' to 'title'
product_details = metadf.set_index('parent_asin')[['title', 'features', 'description', 'price', 'images', 'store', 'categories', 'details']]

# Map 'parent_asin' to 'title' for searching by product title
product_titles = dict(zip(metadf['parent_asin'], metadf['title']))

# Define the product_title you want to find similar items for
product_title = 'VocoPro (VHF4000'  # Replace with an actual product title from your dataset

# Find similar products based on your product title
similar_products = find_similar_products_by_title(product_title, X, item_mapper, item_inv_mapper, product_titles, metric='cosine', k=10)

# Get the product details for the input product title
input_product_id = None
for pid, title in product_titles.items():
    if title.lower() == product_title.lower():
        input_product_id = pid
        break

input_product_details = product_details.loc[input_product_id]

# Print the details of the input product
print(f"Because you viewed/bought '{input_product_details['title']}', here are similar products:")

# Display details of similar products
for similar_product_id in similar_products:
    product_info = product_details.loc[similar_product_id]
    print("\nSimilar Product:")
    print(f"Title: {product_info['title']}")
    print(f"Features: {product_info['features']}")
    print(f"Description: {product_info['description']}")
    print(f"Price: {product_info['price']}")
    print(f"Images: {product_info['images']}")
    print(f"Store: {product_info['store']}")
    print(f"Categories: {product_info['categories']}")
    print(f"Details: {product_info['details']}")


Because you viewed/bought 'VocoPro (VHF4000', here are similar products:

Similar Product:
Title: Seismic Audio SATXSW-8x10 4-Channel 10-Feet Insert Snake Cable 8 TRS to 4 XLR Male and 4 XLR Female
Features: ['Model #: SATXSW-8x10', '10 Foot Insert Snake Cable - 8 TRS to 4 XLR Male and 4 XLR Female', 'Insert Snake Cable', 'Channels: 4 Channel', 'Connectors: 8 TRS Male 1/4" on one end']
Description: ['10 Foot Insert Snake Cable - 8 TRS to 4 XLR Male and 4 XLR Female. Model #: SATXSW-8x10, 10 Foot Insert Snake Cable - 8 TRS to 4 XLR Male and 4 XLR Female, Insert Snake Cable, Channels: 4 Channel, Connectors: 8 TRS Male 1/4" on one end, Connectors: 4 XLR Male and 4 XLR Female on the other end, Length: 10 Feet, Conductor: 22 ga high purity copper, Color coded and numerically labeled, Serviceable Ends, Condition: NEW, One Year Warranty The SATXSW model of snake cables are designed to carry insert signals from a mixing console to other pro audio equipment with XLR ins and outs This snake cabl

In [26]:
def personalization(similar_products):
    total_overlap = 0
    num_comparisons = 0
    for i in range(len(similar_products)):
        for j in range(i + 1, len(similar_products)):
            overlap = len(set(similar_products[i]) & set(similar_products[j]))
            total_overlap += overlap / len(similar_products[i])  # Normalized overlap
            num_comparisons += 1
    return 1 - (total_overlap / num_comparisons)  # Less overlap means higher personalization


In [27]:
metric = personalization(similar_products)

In [28]:
print(metric)

0.6599999999999999


In [29]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [30]:
def content_based_recommendations(metadf, query, top_n=8, chunk_size=1000):
    # Filter products with titles that contain the query (case-insensitive)
    matching_products = metadf[metadf['title'].str.contains(query, case=False)]

    if matching_products.empty:
        print(f"No matching products found for query: '{query}'")
        return pd.DataFrame()

    # Create a TF-IDF vectorizer for the 'description' column
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')

    # Apply TF-IDF vectorization to the descriptions
    tfidf_matrix = tfidf_vectorizer.fit_transform(matching_products['description'].fillna(''))

    # Compute cosine similarity between all products and the first matching product
    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix).flatten()

    # Get the top N most similar items (excluding the query item itself)
    similar_indices = cosine_sim.argsort()[-top_n-1:-1][::-1]

    # Get the details of the recommended items
    recommended_items = matching_products.iloc[similar_indices][
        ['title', 'average_rating', 'rating_number', 'images', 'price', 'store', 'parent_asin']
    ]

    return recommended_items

In [49]:
query = "Guitar pick"

rec_i = content_based_recommendations(metadf, query)

In [51]:
print(rec_i)

                                                   title  average_rating  \
25249  Perris Leathers LP-TB2 The Beatles Guitar Pick...             4.8   
15690  Love And Peace Sign Hippie Psychedelic (2) Pre...             3.0   
24825         Om Guitar Pick, Ohm Guitar Pick, Two Picks             5.0   
20116  Funny Guitar Picks Novelty Gift Set - 10 Guita...             4.3   
1812   2 PCS Guitar Picks Case Guitar Picks Holder Le...             4.5   
15032  Mixed White Luxury Guitar Pick for Electric Ac...             3.4   
24950  Rclover Personalized Guitar Strap Leather, Nam...             4.7   
20478  Leather Guitar Pick Holder Keychain, Guitar Pi...             4.1   

       rating_number                                             images  \
25249            898  {'hi_res': 'https://m.media-amazon.com/images/...   
15690              1  {'hi_res': 'https://m.media-amazon.com/images/...   
24825              3  {'hi_res': 'https://m.media-amazon.com/images/...   
20116          

In [65]:
list_i = rec_i['parent_asin'].to_list()

In [66]:
print(list_i)

['B07G1WD6WT', 'B0BXGWK5NN', 'B00COZHTAS', 'B0BZK73M92', 'B09MLTP4XF', 'B09PQNJKQY', 'B09WMWSBLJ', 'B0BTJ92585']


In [67]:
print(similar_products)

['B09392HX81', 'B07MC4XF9G', 'B01KDK4UTI', 'B0BLM212RK', 'B07DX95KP2', 'B07XP6MHQT', 'B0BPLFP5P6', 'B01M8N618U', 'B0CBSSC46G', 'B0CBSSH4TN']


In [68]:
per = personalization(list_i)
print(per)

0.6642857142857141
