In [None]:
# 📁 notebooks/6_next_product_prediction.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from collections import defaultdict

# Load the data
path = r"E:\c drive\amazon\data\online_retail\online_retail_II.xlsx"
df = pd.read_excel(path, parse_dates=['InvoiceDate'])
df = df[(df['Quantity'] > 0) & (df['Price'] > 0)]
df.dropna(subset=['Customer ID'], inplace=True)
df.sort_values(by=['Customer ID', 'InvoiceDate'], inplace=True)

# 🔁 Generate product transition sequences per customer
transitions = []
for _, group in df.groupby('Customer ID'):
    sequence = group.sort_values('InvoiceDate')['Description'].tolist()
    for i in range(len(sequence)-1):
        transitions.append((sequence[i], sequence[i+1]))

# 🔢 Count transitions
transition_matrix = defaultdict(int)
for pair in transitions:
    transition_matrix[pair] += 1

# 📊 Convert to DataFrame
trans_df = pd.DataFrame([{'From': k[0], 'To': k[1], 'Count': v} for k, v in transition_matrix.items()])

# 📈 Predict next item given current item
next_item = {}
for current_item in trans_df['From'].unique():
    options = trans_df[trans_df['From'] == current_item].sort_values('Count', ascending=False)
    if not options.empty:
        next_item[current_item] = options.iloc[0]['To']

# 🔍 Function to recommend next likely item

def predict_next_item(current):
    if current in next_item:
        return next_item[current]
    else:
        return "No strong transition found"

# 🧪 Test: Predict next likely item
example_item = df['Description'].value_counts().idxmax()
print(f"If a customer buys: {example_item}")
print(f"Next likely product: {predict_next_item(example_item)}")

# 🔁 Save transition DataFrame
trans_df.to_csv("product_transitions.csv", index=False)





If a customer buys: WHITE HANGING HEART T-LIGHT HOLDER
Next likely product: RED HANGING HEART T-LIGHT HOLDER


In [3]:
# 🔁 Interactive: Ask user to enter a product name
while True:
    current = input("\nEnter current product name (or type 'exit' to quit): ").strip()
    if current.lower() == 'exit':
        break
    result = predict_next_item(current)
    print(f"➡️ Next likely product: {result}")


➡️ Next likely product: HOT WATER BOTTLE TEA AND SYMPATHY
