**Product Recommendation**

The Most Related Products

In [137]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

df = pd.read_csv('ECommerce_consumer behaviour.csv').sample(n=500000, random_state=42)

df.dropna(inplace=True)

df['product_name'] = df['product_name'].str.strip()

basket = (df.groupby(['user_id', 'product_name'])['product_name']
          .count().unstack().reset_index().fillna(0)
          .set_index('user_id'))

# Convert to binary
basket = basket.apply(lambda x: x > 0)

frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)

rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

recommendations = rules[['antecedents', 'consequents', 'lift']].sort_values(by='lift', ascending=False)
print(recommendations.head())


                                        antecedents  \
1271                     (fresh vegetables, yogurt)   
1266  (packaged vegetables fruits, packaged cheese)   
1257           (packaged vegetables fruits, yogurt)   
1252                (packaged cheese, fresh fruits)   
1127                (packaged cheese, fresh fruits)   

                                        consequents      lift  
1271  (packaged vegetables fruits, packaged cheese)  3.534240  
1266                     (fresh vegetables, yogurt)  3.534240  
1257                (packaged cheese, fresh fruits)  3.516301  
1252           (packaged vegetables fruits, yogurt)  3.516301  
1127                       (fresh vegetables, milk)  3.485093  


**Function For Related Products**

In [138]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

def find_most_similar_products(product_name, df, min_support=0.01, min_lift=1, top_n=3):
    
    basket = (df.groupby(['user_id', 'product_name'])['product_name']
              .count().unstack().reset_index().fillna(0)
              .set_index('user_id'))

    # Convert to binary
    basket = basket.apply(lambda x: x > 0)

    frequent_itemsets = apriori(basket, min_support=min_support, use_colnames=True)

    rules = association_rules(frequent_itemsets, metric='lift', min_threshold=min_lift)

    filtered_rules = rules[rules['consequents'].apply(lambda x: product_name in x)]

    sorted_rules = filtered_rules.sort_values(by='lift', ascending=False)

    # Get top N similar products with their lift values
    top_similar_products = sorted_rules.head(top_n)[['antecedents', 'lift']]

    return top_similar_products

# Example usage:
df = pd.read_csv('ECommerce_consumer behaviour.csv').sample(n=500000, random_state=42)

df.dropna(inplace=True)
df['product_name'] = df['product_name'].str.strip()

product_name = 'cereal'  # Specify the product for finding similar products
top_similar_products = find_most_similar_products(product_name, df, top_n=5)
print("Product: "+ product_name)
print()
print("Similiar Products for" + product_name)
print(top_similar_products)


Product: cereal

Similiar Products forcereal
            antecedents      lift
541      (fresh fruits)  1.759057
79               (milk)  1.731882
85             (yogurt)  1.664150
539  (fresh vegetables)  1.570020
80    (packaged cheese)  1.517853


**The Most Related Sections**

In [139]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Load cells data
cell_data = [{"x":0,"y":0,"sectionId":"e5650acf-0597-461c-85b8-5679c76df7ac"},
             {"x":0,"y":1,"sectionId":"e5650acf-0597-461c-85b8-5679c76df7ac"},
             {"x":0,"y":2,"sectionId":"e5650acf-0597-461c-85b8-5679c76df7ac"},
             {"x":1,"y":0,"sectionId":"f3853589-d1be-4eb1-b4b1-18cdaa78c63e"},
             {"x":1,"y":1,"sectionId":"f3853589-d1be-4eb1-b4b1-18cdaa78c63e"},
             {"x":1,"y":2,"sectionId":"f3853589-d1be-4eb1-b4b1-18cdaa78c63e"},
             {"x":2,"y":0,"sectionId":"1c292782-1120-4498-aedf-873b45b29046"},
             {"x":2,"y":1,"sectionId":"1c292782-1120-4498-aedf-873b45b29046"},
]

# Sample position data in JSON format
position_data = [
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d2", "personEmail": "helen@gmail.com", "x": 0, "y": 0, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d1", "personEmail": "helen@gmail.com", "x": 0, "y": 2, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d3", "personEmail": "helen@gmail.com", "x": 2, "y": 0, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d4", "personEmail": "helen@gmail.com", "x": 1, "y": 2, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d5", "personEmail": "johndoe@gmail.com", "x": 1, "y": 0, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d6", "personEmail": "johndoe@gmail.com", "x": 2, "y": 0, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d5", "personEmail": "alexander@gmail.com", "x": 0, "y": 1, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d6", "personEmail": "alexander@gmail.com", "x": 2, "y": 0, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d7", "personEmail": "micheal@gmail.com", "x": 0, "y": 0, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d8", "personEmail": "micheal@gmail.com", "x": 0, "y": 0, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e5d9", "personEmail": "jack@gmail.com", "x": 1, "y": 1, "time": "2024-04-23"},
    {"uid": "7b580377-ca5c-4326-a235-25898711e510", "personEmail": "jack@gmail.com", "x": 1, "y": 1, "time": "2024-04-23"},
]

# Sample section data containing section IDs and names
section_data = [{"id":"2e7e3c54-c5c9-4d99-9c6f-fb38117f3126","name":"canned goods"},
                {"id":"c31db0cc-e796-44b8-8239-6c9a3cc51e95","name":"babies"},
                {"id":"8932701b-9b18-4818-8e7d-d92667a3a59a","name":"dry goods pasta"},
                {"id":"dc41e23f-9919-498e-a894-883640033a05","name":"missing"},
                {"id":"9651a328-c842-4965-8f45-b592a5bd5ddb","name":"produce"},
                {"id":"d1fb5bbc-e358-4039-8ce0-f2f4ef8f328f","name":"household"},
                {"id":"e5650acf-0597-461c-85b8-5679c76df7ac","name":"dairy eggs"},
                {"id":"53cc11d0-4014-42c0-ac0f-42b7e84a9e7a","name":"international"},
                {"id":"dbd8bb0c-95ec-4794-9d14-3618b3698ca1","name":"bulk"},
                {"id":"408ce76e-c6c1-4819-a47a-456627dd7d56","name":"pantry"},
                {"id":"5134cede-8ca7-4e15-ab3d-64362b495d48","name":"deli"},
                {"id":"b516b990-8cd1-4152-9c3f-9049cb14086d","name":"meat seafood"},
                {"id":"63cd2dcd-e1b3-4b53-98c5-dc079db97fcc","name":"breakfast"},
                {"id":"1c292782-1120-4498-aedf-873b45b29046","name":"bakery"},
                {"id":"f3853589-d1be-4eb1-b4b1-18cdaa78c63e","name":"beverages"},
                {"id":"98e916e3-f6da-4e2c-8537-8fc63872512f","name":"frozen"},
                {"id":"2d69e9b6-f927-4732-8a97-93a27691ede8","name":"snacks"},
                {"id":"b312ddc6-7af5-4411-94d1-9ac74d4cefc6","name":"pets"},
                {"id":"4f12f98e-cb83-4231-957b-1ba2a492eedd","name":"alcohol"},
                {"id":"424f7521-4954-40f4-8091-3c1706d30b82","name":"other"},
                {"id":"4816bbae-6670-4253-a47f-e7eac1f32b2b","name":"personal care"}]

# Create DataFrames from position data and section data
position_df = pd.DataFrame(position_data)
section_df = pd.DataFrame(section_data)
cell_df = pd.DataFrame(cell_data)

# Merge position_df and cell_df on 'x' and 'y' columns
merged_df = pd.merge(position_df, cell_df, on=['x', 'y'])

# Merge with section_df on 'sectionId' to get section names
merged_df = pd.merge(merged_df, section_df, left_on='sectionId', right_on='id')

merged_df['name'] = merged_df['name'].str.strip()

basket = (merged_df.groupby(['personEmail', 'name'])['name']
          .count().unstack().reset_index().fillna(0)
          .set_index('personEmail'))

# Convert to binary
basket = basket.apply(lambda x: x > 0)

frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)

rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

recommendations = rules[['antecedents', 'consequents', 'lift']].sort_values(by='lift', ascending=False)
print(recommendations.head(5))





               antecedents              consequents      lift
4  (dairy eggs, beverages)                 (bakery)  1.666667
5                 (bakery)  (dairy eggs, beverages)  1.666667
0              (beverages)                 (bakery)  1.111111
1                 (bakery)              (beverages)  1.111111
2             (dairy eggs)                 (bakery)  1.111111


In [140]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules


# Merge position_df and cell_df on 'x' and 'y' columns
merged_df = pd.merge(position_df, cell_df, on=['x', 'y'])

# Merge with section_df on 'sectionId' to get section names
merged_df = pd.merge(merged_df, section_df, left_on='sectionId', right_on='id')

def find_most_similar_sections(section_name, df, min_support=0.01, min_lift=1, top_n=3):
    # Group by personEmail and section name, then count occurrences
    basket = (merged_df.groupby(['personEmail', 'name'])['name']
              .count().unstack().reset_index().fillna(0)
              .set_index('personEmail'))

    # Convert counts to binary (0 or 1)
    basket = basket.apply(lambda x: x > 0)

    # Find frequent itemsets using Apriori algorithm
    frequent_itemsets = apriori(basket, min_support=min_support, use_colnames=True)

    # Generate association rules based on lift metric
    rules = association_rules(frequent_itemsets, metric='lift', min_threshold=min_lift)

    # Filter rules to include only those involving the specified section name
    filtered_rules = rules[rules['consequents'].apply(lambda x: section_name in x)]

    # Sort the filtered rules by lift in descending order
    sorted_rules = filtered_rules.sort_values(by='lift', ascending=False)

    # Get top N similar sections with their lift values
    top_similar_sections = sorted_rules.head(top_n)[['antecedents', 'lift']]

    return top_similar_sections


# Example usage:
merged_df['name'] = merged_df['name'].str.strip()


section_name = 'bakery'  # Specify the product for finding similar products
top_similar_sections = find_most_similar_sections(section_name, merged_df, top_n=3)
print("Section: "+ section_name)
print()
print("Similiar Sections for " + section_name)
print(top_similar_sections)


Section: bakery

Similiar Sections for bakery
               antecedents      lift
4  (dairy eggs, beverages)  1.666667
0              (beverages)  1.111111
2             (dairy eggs)  1.111111
