**Product Recommendation**

The Most Related Products

In [11]:
import pandas as pd

from mlxtend.frequent_patterns import apriori, association_rules

df = pd.read_csv('ECommerce_consumer behaviour.csv').sample(n=500000, random_state=42)

df.dropna(inplace=True)

df['product_name'] = df['product_name'].str.strip()

basket = (df.groupby(['user_id', 'product_name'])['product_name']
          .count().unstack().reset_index().fillna(0)
          .set_index('user_id'))

# Convert to binary
basket = basket.apply(lambda x: x > 0)

frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)

rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

recommendations = rules[['antecedents', 'consequents', 'lift']].sort_values(by='lift', ascending=False)
print(recommendations.head())


                                        antecedents  \
1267                     (fresh vegetables, yogurt)   
1270  (packaged vegetables fruits, packaged cheese)   
1252           (packaged vegetables fruits, yogurt)   
1257                (fresh fruits, packaged cheese)   
1131                (fresh fruits, packaged cheese)   

                                        consequents      lift  
1267  (packaged vegetables fruits, packaged cheese)  3.534240  
1270                     (fresh vegetables, yogurt)  3.534240  
1252                (fresh fruits, packaged cheese)  3.516301  
1257           (packaged vegetables fruits, yogurt)  3.516301  
1131                       (fresh vegetables, milk)  3.485093  


**Function For Related Products**

In [12]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

def find_most_similar_products(product_name, df, min_support=0.01, min_lift=1, top_n=3):
    
    basket = (df.groupby(['user_id', 'product_name'])['product_name']
              .count().unstack().reset_index().fillna(0)
              .set_index('user_id'))

    # Convert to binary
    basket = basket.apply(lambda x: x > 0)

    frequent_itemsets = apriori(basket, min_support=min_support, use_colnames=True)

    rules = association_rules(frequent_itemsets, metric='lift', min_threshold=min_lift)

    filtered_rules = rules[rules['consequents'].apply(lambda x: product_name in x)]

    sorted_rules = filtered_rules.sort_values(by='lift', ascending=False)

    # Get top N similar products with their lift values
    top_similar_products = sorted_rules.head(top_n)[['antecedents', 'lift']]

    return top_similar_products

# Example usage:
df = pd.read_csv('ECommerce_consumer behaviour.csv').sample(n=500000, random_state=42)

df.dropna(inplace=True)
df['product_name'] = df['product_name'].str.strip()

product_name = 'cereal'  # Specify the product for finding similar products
top_similar_products = find_most_similar_products(product_name, df, top_n=5)
print("Product: "+ product_name)
print()
print("Similiar Products for " + product_name)
print(top_similar_products)


Product: cereal

Similiar Products for cereal
            antecedents      lift
541      (fresh fruits)  1.759057
78               (milk)  1.731882
85             (yogurt)  1.664150
539  (fresh vegetables)  1.570020
81    (packaged cheese)  1.517853


**The Most Related Sections**