Precondition: run Task2_clean.ipynb before running this code.
This creates the input file of the Customer-Product utility matrix

This code in based on Task2_collab_user_single.ipynb<br>
The code in this notebook allows you to find out the recommendations for any customer, not just one.


In [84]:
from product_data import ProductData
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
from save_file import save_file
import logging, importlib, sys

In [85]:
# ensure logging is output in the Jupyter Notebook
importlib.reload(logging)
logging.basicConfig(format='%(asctime)s | %(levelname)s : %(message)s',
                     level=logging.INFO, stream=sys.stdout)

In [86]:


file_name = "recom_pivot.csv"
prod = ProductData(file_name)
prod.set_pivot_dataframe_data_types()

Need to recommend products for the customer

Do not recommend products that the customer has already bought (also ignore where correlation = 1)

Need to recommend products that similar customers have bought but the picked customer has not.

In [87]:
def get_recommendations(picked_customer_id, prod, np_customer_similarity):
    # Number of similar customers
    n = 10
    # Customer similarity threshold
    customer_similarity_threshold = 0.1
    # find the index of the picked customer id
    picked_customer_ind = prod.dict_customers_rvs[picked_customer_id]
    # Get top n similar customers
    # Also exclude similar customers where the correlation = 1.
    # Correlation = 1 means the customer record is precisely the same as the picked customer.
    # In which case no new products will be recommended from that customer
    customer_row = np_customer_similarity[picked_customer_ind]
    
    list_customer_results = []
    #logging.info(f'get similar customers for {picked_customer_id}')
    for customer_ind, corr_val in enumerate(customer_row):
        if corr_val < customer_similarity_threshold or corr_val > 0.999:
            continue

        customer = prod.dict_customers[customer_ind]
        if customer == picked_customer_id:
            continue
            
        correlation = round(corr_val, 2)
        list_customer_results.append((customer, correlation))

    list_customer_results.sort(key = lambda x: x[1], reverse=True)
    np_products = prod.df_products.to_numpy()
    product_names = prod.df_products.columns.values
    # convert product names list to dictionary
    product_dict = {key: i for i, key in enumerate(product_names)}
    product_dict_rvs = dict([(value, key) for key, value in product_dict.items()])
    # get product_ids for the picked customer
    picked_customer_products = prod.df_products.iloc[picked_customer_ind]
    dict_picked_customer_products = {}
    list_picked_customer_products = []
    #logging.info(f'store customer {picked_customer_id} products')
    for i, val in enumerate(picked_customer_products.values):
        if val > 0:
            product_id = product_dict_rvs[i]
            dict_picked_customer_products[product_id] = val
            list_picked_customer_products.append(product_id)
    np_selected_customers = np.empty((0, prod.df_products.shape[1]))
    logging.info(f'store recommended products for customer {picked_customer_id}')
    for customer, correlation in tqdm(list_customer_results):
        customer_ind = prod.dict_customers_rvs[customer]
        row = np_products[customer_ind]
        np_selected_customers = np.vstack([np_selected_customers, row])
    # Locate the best selling products for all the similar customers, these will be the recommended products for the selected customer. Exclude products that the customer has already bought.
    product_totals = np_selected_customers.sum(axis=0)
    dict_product_totals = dict((product_dict_rvs[key], value) for key, value in enumerate(product_totals, 0) if value > 3 and product_dict_rvs[key] not in list_picked_customer_products)
    dict_product_totals = sorted(dict_product_totals.items(), key=lambda x:x[1], reverse=True)
    return dict_product_totals

In [88]:
def get_recommendations_for_customer(picked_customer_id, prod, np_customer_similarity, N = 5):
    dict_product_totals = get_recommendations(picked_customer_id, prod, np_customer_similarity)
    print(f"Top {N} products recommended for the picked customer {picked_customer_id}")

    top_product_recommendations = [x for index, x in enumerate(dict_product_totals) if index < N]
    for product, amount in top_product_recommendations:
        print(f"product {product} was bought {amount} times by similar customers")
    print("-------------------")

In [89]:
# Generate the correlation (Pearson values) matrix for customers
x1 = prod.df_products.astype(float).to_numpy()
np_customer_similarity = np.corrcoef(x1)

# pick a customer and get the index for that customer
picked_customer_id = "0008e848"
get_recommendations_for_customer(picked_customer_id, prod, np_customer_similarity)

2022-12-21 12:18:11,402 | INFO : store recommended products for customer 0008e848


100%|██████████| 72/72 [00:00<00:00, 9024.59it/s]

Top 5 products recommended for the picked customer 0008e848
product 5000.5 was bought 42 times by similar customers
product 49292.0 was bought 25 times by similar customers
product 45004.0 was bought 24 times by similar customers
product 49291.5 was bought 23 times by similar customers
product 35076.0 was bought 16 times by similar customers
-------------------





It takes a lot of processing time to generate recommenders for a customer, and there are over 28,000 customers. So here you can specify how many customers and how many recommendations you wish to make. And the data is output to a csv file where you can check the results.

In [90]:
number_of_recommendations = 5
number_of_customers = 10

df_columns = ["Customer_Id"]
for i in range(number_of_recommendations):
    df_columns.append(f"Code_Product{i + 1}")
    df_columns.append(f"Number_of_Similar_Customers_Buy{i + 1}")

df_output = pd.DataFrame(columns=df_columns)

for index, item in enumerate(prod.df_customer_ids.values):
    if index >= number_of_customers:
        break
    
    picked_customer_id = item[0]
    dict_product_totals = get_recommendations(picked_customer_id, prod, np_customer_similarity)
    row = [picked_customer_id]
    top_product_recommendations = [x for index, x in enumerate(dict_product_totals) if index < number_of_recommendations]
    for results in top_product_recommendations:
        product_id = results[0]
        amount = results[1]
        row.append(product_id)
        row.append(amount)
    df_output.loc[len(df_output)] = row
    
output_file = "customer_recommendations.csv"
save_file(output_file, df_output)
print(f"the following records are output to {output_file}")
df_output


2022-12-21 12:18:11,684 | INFO : store recommended products for customer 00024de6


100%|██████████| 51/51 [00:00<00:00, 10276.70it/s]


2022-12-21 12:18:11,934 | INFO : store recommended products for customer 00084856


100%|██████████| 7741/7741 [00:46<00:00, 167.18it/s]


2022-12-21 12:18:58,435 | INFO : store recommended products for customer 0008e848


100%|██████████| 72/72 [00:00<00:00, 9022.43it/s]


2022-12-21 12:18:58,611 | INFO : store recommended products for customer 00096930


100%|██████████| 1659/1659 [00:02<00:00, 778.39it/s] 


2022-12-21 12:19:00,917 | INFO : store recommended products for customer 000c66b7


100%|██████████| 1426/1426 [00:01<00:00, 865.21it/s] 


2022-12-21 12:19:02,806 | INFO : store recommended products for customer 000e98ee


100%|██████████| 7741/7741 [00:48<00:00, 158.20it/s] 


2022-12-21 12:19:51,977 | INFO : store recommended products for customer 00165e2e


100%|██████████| 35/35 [00:00<00:00, 7017.57it/s]


2022-12-21 12:19:52,193 | INFO : store recommended products for customer 00171ffd


100%|██████████| 317/317 [00:00<00:00, 4295.29it/s]


2022-12-21 12:19:52,485 | INFO : store recommended products for customer 0019e439


100%|██████████| 1161/1161 [00:01<00:00, 1018.46it/s]

2022-12-21 12:19:53,802 | INFO : store recommended products for customer 001a93a4



100%|██████████| 264/264 [00:00<00:00, 3529.12it/s]

the following records are output to customer_recommendations.csv





Unnamed: 0,Customer_Id,Code_Product1,Number_of_Similar_Customers_Buy1,Code_Product2,Number_of_Similar_Customers_Buy2,Code_Product3,Number_of_Similar_Customers_Buy3,Code_Product4,Number_of_Similar_Customers_Buy4,Code_Product5,Number_of_Similar_Customers_Buy5
0,00024de6,35096.5,34,5000.5,26,49291.5,19,49292.0,19,35076.0,16
1,00084856,5000.5,1298,45004.0,1202,10013.0,431,25003.0,399,48504.5,369
2,0008e848,5000.5,42,49292.0,25,45004.0,24,49291.5,23,35076.0,16
3,00096930,49292.0,993,5000.5,953,45004.0,764,10013.0,281,25003.0,261
4,000c66b7,5000.5,535,45004.0,433,49292.0,393,49291.5,341,10013.0,169
5,000e98ee,5000.5,1250,45004.0,1152,10013.0,408,25003.0,369,48504.5,349
6,00165e2e,49291.5,25,49292.0,23,5000.5,20,35077.0,9,10013.0,8
7,00171ffd,5000.5,262,49291.5,211,49292.0,205,45004.0,157,25003.0,94
8,0019e439,5000.5,449,49291.5,300,45004.0,296,49292.0,283,5009.0,117
9,001a93a4,5000.5,196,45004.0,163,49292.0,148,49291.5,130,45001.0,77


This time select random customers and display the results in a user friendly manner

In [91]:
def gen_random_numbers_in_range(low, high, n):
    a_list = list(range(low, high))
    random.shuffle(a_list)
    return a_list[:n]

In [92]:
random_indexes = gen_random_numbers_in_range(0, len(prod.df_customer_ids) - 1, 9)
for i in random_indexes[0:3]:
    picked_customer_id = prod.df_customer_ids.values[i][0]
    get_recommendations_for_customer(picked_customer_id, prod, np_customer_similarity)

2022-12-21 12:19:54,222 | INFO : store recommended products for customer 3dac300a


100%|██████████| 258/258 [00:00<00:00, 5747.91it/s]

Top 5 products recommended for the picked customer 3dac300a
product 5000.5 was bought 180 times by similar customers
product 45004.0 was bought 165 times by similar customers
product 49292.0 was bought 139 times by similar customers
product 49291.5 was bought 104 times by similar customers
product 10013.0 was bought 71 times by similar customers
-------------------





2022-12-21 12:19:54,471 | INFO : store recommended products for customer 2e32aab6


100%|██████████| 4225/4225 [00:14<00:00, 287.79it/s] 


Top 5 products recommended for the picked customer 2e32aab6
product 49292.0 was bought 817 times by similar customers
product 5000.5 was bought 739 times by similar customers
product 45004.0 was bought 592 times by similar customers
product 10013.0 was bought 232 times by similar customers
product 25003.0 was bought 215 times by similar customers
-------------------
2022-12-21 12:20:09,344 | INFO : store recommended products for customer e90f4f6e


100%|██████████| 148/148 [00:00<00:00, 7798.26it/s]

Top 5 products recommended for the picked customer e90f4f6e
product 5000.5 was bought 158 times by similar customers
product 49292.0 was bought 120 times by similar customers
product 45004.0 was bought 115 times by similar customers
product 49291.5 was bought 82 times by similar customers
product 5009.0 was bought 63 times by similar customers
-------------------





In [93]:
for i in random_indexes[3:6]:
    picked_customer_id = prod.df_customer_ids.values[i][0]
    get_recommendations_for_customer(picked_customer_id, prod, np_customer_similarity)

2022-12-21 12:20:09,569 | INFO : store recommended products for customer 4ca70adf


100%|██████████| 9/9 [00:00<?, ?it/s]

Top 5 products recommended for the picked customer 4ca70adf
product 35075.5 was bought 5 times by similar customers
product 49291.5 was bought 5 times by similar customers
product 5000.5 was bought 4 times by similar customers
-------------------
2022-12-21 12:20:09,729 | INFO : store recommended products for customer 29f960bd



100%|██████████| 222/222 [00:00<00:00, 6183.21it/s]

Top 5 products recommended for the picked customer 29f960bd
product 49291.5 was bought 142 times by similar customers
product 5000.5 was bought 133 times by similar customers
product 45004.0 was bought 125 times by similar customers
product 49292.0 was bought 116 times by similar customers
product 48513.0 was bought 51 times by similar customers
-------------------
2022-12-21 12:20:09,924 | INFO : store recommended products for customer 6cac3cf9



100%|██████████| 198/198 [00:00<00:00, 6845.98it/s]

Top 5 products recommended for the picked customer 6cac3cf9
product 5000.5 was bought 101 times by similar customers
product 49291.5 was bought 92 times by similar customers
product 49292.0 was bought 86 times by similar customers
product 45004.0 was bought 55 times by similar customers
product 45001.0 was bought 32 times by similar customers
-------------------





In [94]:
for i in random_indexes[6:9]:
    picked_customer_id = prod.df_customer_ids.values[i][0]
    get_recommendations_for_customer(picked_customer_id, prod, np_customer_similarity)

2022-12-21 12:20:10,177 | INFO : store recommended products for customer ba97baa6


100%|██████████| 255/255 [00:00<00:00, 6087.84it/s]

Top 5 products recommended for the picked customer ba97baa6
product 5000.5 was bought 172 times by similar customers
product 49292.0 was bought 167 times by similar customers
product 49291.5 was bought 157 times by similar customers
product 45004.0 was bought 127 times by similar customers
product 45001.0 was bought 59 times by similar customers
-------------------
2022-12-21 12:20:10,389 | INFO : store recommended products for customer 631f1962



100%|██████████| 121/121 [00:00<00:00, 8666.81it/s]

Top 5 products recommended for the picked customer 631f1962
product 5000.5 was bought 51 times by similar customers
product 49291.5 was bought 41 times by similar customers
product 49292.0 was bought 39 times by similar customers
product 45004.0 was bought 33 times by similar customers
product 10013.0 was bought 21 times by similar customers
-------------------
2022-12-21 12:20:10,597 | INFO : store recommended products for customer b32563ee



100%|██████████| 7213/7213 [00:41<00:00, 174.61it/s]

Top 5 products recommended for the picked customer b32563ee
product 49292.0 was bought 1089 times by similar customers
product 45004.0 was bought 1059 times by similar customers
product 49291.5 was bought 1059 times by similar customers
product 10013.0 was bought 459 times by similar customers
product 5009.0 was bought 428 times by similar customers
-------------------



