In [7]:
#import libiraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split

#read file
file_path = 'Retail_Data_Cleaned.csv'
df = pd.read_csv(file_path)



In [2]:
##collaborative
#encode identifiers into numeric values
customer_encoder = LabelEncoder()
product_encoder = LabelEncoder()

#store numeric values in new column
#df=dataframe
df['Customer_ID'] = customer_encoder.fit_transform(df['Customer_Name'])
df['Product_ID'] = product_encoder.fit_transform(df['Product'])

# user-item interaction matrix
user_item_matrix = df.pivot_table(
    index='Customer_ID',
    columns='Product_ID',
    values='Total_Items', # quantity of items
    aggfunc='sum',        # Summing items in case of multiple entries for the same user
    fill_value=0          # missing
    )
# Convert to sparse matrix
user_item_matrix_sparse = csr_matrix(user_item_matrix.values)

# Calculate the cosine similarity between users
user_similarity_sparse = cosine_similarity(user_item_matrix_sparse)

#Convert the similarity matrix to a DataFrame
user_similarity_df = pd.DataFrame(user_similarity_sparse, index=user_item_matrix.index, columns=user_item_matrix.index)

def recommend_products(customer_id, top_n=10):

    similar_customers = user_similarity_df[customer_id].sort_values(ascending=False).iloc[1:top_n+1]  #Find the most similar customers
    recommended_products = {}   # empty dictionary to store recommended products

    for similar_customer in similar_customers.index:
        similar_customer_products = user_item_matrix.loc[similar_customer] #Get the products purchased
        for product_id, count in similar_customer_products[similar_customer_products > 0].items():
            if product_id not in recommended_products:
                recommended_products[product_id] = count
            else:
                recommended_products[product_id] += count

#remove selected product
    #get the products already purchased by the target customer
    customer_products = user_item_matrix.loc[customer_id]
    purchased_products = set(customer_products[customer_products > 0].index)

#filter out products already purchased by the customer
    recommended_products = {
        product_id: count for product_id, count in recommended_products.items()
        if product_id not in purchased_products }

# Sort recommendations by frequency of purchase (descending)
    recommended_products_sorted = sorted(recommended_products.items(), key=lambda x: x[1], reverse=True)

# Convert product IDs back to names
    recommended_product_names = product_encoder.inverse_transform([product_id for product_id, _ in recommended_products_sorted[:top_n]])

    return recommended_product_names

In [3]:
#print customer id
print(df[['Customer_Name', 'Customer_ID']].head(20))

          Customer_Name  Customer_ID
0       Cheyenne Newman         4521
1      Emily Fitzgerald         7984
2          Michael Webb        17806
3          Kimberly Lin        14454
4       Cathy Hernandez         4190
5        Elizabeth Cook         7806
6          Kara Bradley        13183
7       Carla Hernandez         3788
8      Christopher Wang         5050
9         Alisha Hudson          611
10     Samantha Mcclure        21526
11         Shari Thomas        22333
12       David Randolph         6444
13          Maria Munoz        16262
14  Christopher Barnett         4797
15       Jonathan Roach        12236
16       Alexander Hall          385
17          Bryan Smith         3605
18        Kayla Sanchez        13739
19          Adam Foster          145


In [4]:
def check_recommendation(customer_id):
    recommendations = recommend_products(customer_id)
    if len(recommendations) > 0:
        print(f"Check Passed: Recommendations for customer {customer_id} are {recommendations}")
    else:
        print(f"Check Failed: No recommendations found for customer {customer_id}")

# Example usage
check_recommendation( 145)

Check Passed: Recommendations for customer 145 are ["['Shower Gel', 'Ketchup']" "['Water', 'Lawn Mower']"
 "['Tea', 'Pickles']" "['BBQ Sauce', 'Soap']"
 "['Carrots', 'Water', 'Baby Wipes']"
 "['Cereal', 'Insect Repellent', 'Spinach', 'Milk', 'Tuna']"
 "['Dish Soap', 'BBQ Sauce']" "['Bread', 'Paper Towels']"
 "['Cleaning Spray', 'Air Freshener', 'Pasta', 'Salmon', 'Rice']"
 "['Broom', 'Olive Oil', 'Razors', 'Bath Towels']"]
