In [3]:
import pandas as pd
import numpy as np

In [4]:
# Load the transactions data
transactions = pd.read_csv("grocery_dataset.csv")

In [5]:
member_purchases = transactions.groupby(['Member_number', 'itemDescription'])['itemDescription'].count().unstack().fillna(0)
item_descriptions = member_purchases.columns

def simple_association(item_name):
    item_basket = member_purchases[member_purchases[item_name] > 0]
    co_purchase_counts = item_basket.sum().sort_values(ascending=False).drop(item_name)
    return co_purchase_counts.head(5)

ex_item = item_descriptions[20]
simple_association(ex_item)

itemDescription
whole milk          515.0
other vegetables    361.0
rolls/buns          344.0
soda                271.0
yogurt              242.0
dtype: float64

In [6]:
# Function to create a transaction matrix
def create_transaction_matrix(transactions):
    # Group the transactions by member number, date and item description
    # Count the number of each item bought by each member on each date
    # Unstack the item descriptions to create a matrix where rows are transactions and columns are items
    # Fill any missing values with 0
    # Set the index to be the member number and date
    basket = (transactions.groupby(['Member_number', 'Date', 'itemDescription'])['itemDescription']
              .count().unstack().reset_index().fillna(0)
              .set_index(['Member_number', 'Date']))
    
    # Convert the counts to True or False
    # True if the item was bought in the transaction, False otherwise
    return basket.applymap(lambda x: True if x > 0 else False)

# Function to calculate a lift matrix
def calculate_lift_matrix(basket_sets, min_joint_probability=0.001):
    # Calculate the joint probability of each pair of items
    probability_pair = pd.DataFrame(index=basket_sets.columns, columns=basket_sets.columns)
    for item1 in basket_sets.columns:
        for item2 in basket_sets.columns:
            joint_probability = (basket_sets[item1] & basket_sets[item2]).sum() / len(basket_sets)
            if joint_probability > min_joint_probability:
                probability_pair.loc[item1, item2] = joint_probability
            else:
                probability_pair.loc[item1, item2] = 0

    # Set the diagonal of the joint probability matrix to 0
    np.fill_diagonal(probability_pair.values, 0)

    # Calculate the individual probability of each item
    probability_item = basket_sets.mean()

    # Calculate the product of the individual probabilities
    probability_product = np.outer(probability_item, probability_item)

    # Calculate the lift of each pair of items
    lift_matrix = probability_pair.divide(probability_product, fill_value=0)
    
    return lift_matrix

# Function to recommend items
def recommend_items(lift_matrix, item, num_recommendations=10):
    # Sort the items in the lift matrix for the given item in descending order
    # Take the top num_recommendations items
    recommended_for_item = lift_matrix[item].sort_values(ascending=False).head(num_recommendations)
    
    # Print the recommended items
    print(f"Top {num_recommendations} recommendations for {item}:\n")
    print(recommended_for_item, "\n\n")

# Create transaction matrix
basket_sets = create_transaction_matrix(transactions)

# Calculate the lift matrix
lift_matrix = calculate_lift_matrix(basket_sets)

# Recommend items for 'meat'
recommend_items(lift_matrix, 'soda')

Top 10 recommendations for soda:

itemDescription
oil                    1.246844
beverages              1.162678
sausage                1.014975
grapes                 1.001195
shopping bags           0.95459
frozen meals           0.943642
specialty bar          0.936182
butter                 0.918418
candy                  0.910056
specialty chocolate    0.904846
Name: soda, dtype: object 


