Precondition: run Task2_clean.ipynb before running this code.
This creates the input file of the Customer-Product utility matrix

This code in based on Task2_collab_user_single.ipynb<br>
The code in this notebook allows you to find out the recommendations for any customer, not just one.


In [1]:
from product_data import ProductData
import numpy as np
import random

In [2]:
file_name = "recom_pivot.csv"
prod = ProductData(file_name)
prod.set_pivot_dataframe_data_types()

Need to recommend products for the customer

Do not recommend products that the customer has already bought (also ignore where correlation = 1)

Need to recommend products that similar customers have bought but the picked customer has not.

In [3]:
def get_recommendations(picked_customer_id, prod, np_customer_similarity):
    # Number of similar customers
    n = 10
    # Customer similarity threshold
    customer_similarity_threshold = 0.1
    # find the index of the picked customer id
    picked_customer_ind = prod.dict_customers_rvs[picked_customer_id]
    # Get top n similar users
    # Also exclude similar users where the correlation = 1.
    # Correlation = 1 means the customer record is precisely the same as the picked customer.
    # In which case no new products will be recommended from that customer
    customer_row = np_customer_similarity[picked_customer_ind]
    list_customer_results = []
    for customer_ind, corr_val in enumerate(customer_row):
        if corr_val < customer_similarity_threshold and corr_val < 1:
            continue

        customer = prod.dict_customers[customer_ind]
        if customer == picked_customer_id:
            continue
            
        correlation = round(corr_val, 2)
        list_customer_results.append((customer, correlation))

    list_customer_results.sort(key = lambda x: x[1], reverse=True)
    np_products = prod.df_products.to_numpy()
    product_names = prod.df_products.columns.values
    # convert product names list to dictionary
    product_dict = {key: i for i, key in enumerate(product_names)}
    product_dict_rvs = dict([(value, key) for key, value in product_dict.items()])
    # get product_ids for the picked customer
    picked_customer_products = prod.df_products.iloc[picked_customer_ind]
    dict_picked_customer_products = {}
    list_picked_customer_products = []
    for i, val in enumerate(picked_customer_products.values):
        if val > 0:
            product_id = product_dict_rvs[i]
            dict_picked_customer_products[product_id] = val
            list_picked_customer_products.append(product_id)
    np_selected_customers = np.empty((0, prod.df_products.shape[1]))
    for customer, correlation in list_customer_results:
        customer_ind = prod.dict_customers_rvs[customer]
        row = np_products[customer_ind]
        np_selected_customers = np.vstack([np_selected_customers, row])
    # Locate the best selling products for all the similar customers, these will be the recommended products for the selected customer. Exclude products that the customer has already bought.
    product_totals = np_selected_customers.sum(axis=0)
    dict_product_totals = dict((product_dict_rvs[key], value) for key, value in enumerate(product_totals, 0) if value > 3 and product_dict_rvs[key] not in list_picked_customer_products)
    dict_product_totals = sorted(dict_product_totals.items(), key=lambda x:x[1], reverse=True)
    return dict_product_totals

In [4]:
def get_recommendations_for_customer(picked_customer_id, prod, np_customer_similarity, N = 5):
    dict_product_totals = get_recommendations(picked_customer_id, prod, np_customer_similarity)
    print(f"Top {N} products recommended for the picked customer {picked_customer_id}")

    top_product_recommendations = [x for index, x in enumerate(dict_product_totals) if index < N]
    for product, amount in top_product_recommendations:
        print(f"product {product} was bought {amount} times by similar customers")
    print("-------------------")

In [5]:
# Generate the correlation (Pearson values) matrix for customers
x1 = prod.df_products.astype(float).to_numpy()
np_customer_similarity = np.corrcoef(x1)

# pick a customer and get the index for that customer
picked_customer_id = "0008e848"
get_recommendations_for_customer(picked_customer_id, prod, np_customer_similarity)

Top 5 products recommended for the picked customer 0008e848
product 5000.5 was bought 42 times by similar customers
product 49292.0 was bought 25 times by similar customers
product 45004.0 was bought 24 times by similar customers
product 49291.5 was bought 23 times by similar customers
product 35076.0 was bought 16 times by similar customers
-------------------


In [6]:
def gen_random_numbers_in_range(low, high, n):
    a_list = list(range(low, high))
    random.shuffle(a_list)
    return a_list[:n]

In [7]:
random_indexes = gen_random_numbers_in_range(0, len(prod.df_customer_ids) - 1, 5)
for i in random_indexes:
    picked_customer_id = prod.df_customer_ids.values[i][0]
    get_recommendations_for_customer(picked_customer_id, prod, np_customer_similarity)

Top 5 products recommended for the picked customer 94f442b2
product 49291.5 was bought 935 times by similar customers
product 5000.5 was bought 892 times by similar customers
product 45004.0 was bought 818 times by similar customers
product 10013.0 was bought 311 times by similar customers
product 25003.0 was bought 285 times by similar customers
-------------------
Top 5 products recommended for the picked customer db217e3b
product 49291.5 was bought 935 times by similar customers
product 5000.5 was bought 892 times by similar customers
product 45004.0 was bought 818 times by similar customers
product 10013.0 was bought 311 times by similar customers
product 25003.0 was bought 285 times by similar customers
-------------------
Top 5 products recommended for the picked customer 8aa428b8
product 49291.5 was bought 935 times by similar customers
product 5000.5 was bought 892 times by similar customers
product 45004.0 was bought 818 times by similar customers
product 10013.0 was bought 31