In [4]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt

# 1. Load the data
products_df = pd.read_csv('products.csv')
stores_df = pd.read_csv('stores.csv')
transactions_df = pd.read_csv('transactions.csv')

# 2. Data preprocessing
# Convert date column to datetime
transactions_df['date'] = pd.to_datetime(transactions_df['date'], dayfirst=True)

# Add time-based features
transactions_df['day_of_week'] = transactions_df['date'].dt.day_name()
transactions_df['hour'] = transactions_df['date'].dt.hour
transactions_df['is_weekend'] = transactions_df['date'].dt.dayofweek.isin([5, 6])

# Calculate transaction values
transactions_df['transaction_value'] = transactions_df['quantity'] * transactions_df['unit_price']

# 3. Create basket matrix for association rules
basket_matrix = pd.crosstab(
    transactions_df['transaction_id'],
    transactions_df['category']
)
basket_matrix = (basket_matrix > 0).astype(int)

# 4. Generate association rules
frequent_itemsets = apriori(
    basket_matrix,
    min_support=0.05,  # 5% minimum support as required
    use_colnames=True
)

rules = association_rules(
    frequent_itemsets,
    metric="lift",
    min_threshold=1
)
rules = rules.sort_values('lift', ascending=False)

# 5. Store performance analysis
store_metrics = transactions_df.groupby('store_id').agg({
    'transaction_id': 'nunique',  # Number of transactions
    'transaction_value': 'mean',   # Average transaction value
    'is_loyalty_member': 'mean',   # Loyalty membership rate
    'quantity': 'sum'              # Total items sold
}).reset_index()

# Merge with store information
store_metrics = store_metrics.merge(stores_df, on='store_id')

# 6. Time-based analysis
hourly_patterns = transactions_df.groupby('hour').agg({
    'transaction_id': 'nunique',
    'transaction_value': 'mean'
}).reset_index()

weekly_patterns = transactions_df.groupby('day_of_week').agg({
    'transaction_id': 'nunique',
    'transaction_value': 'mean'
}).reset_index()

# 7. Promotional recommendations
high_margin_categories = products_df[
    products_df['margin'] > products_df['margin'].mean()
]['category'].tolist()

promo_recommendations = rules[
    rules['consequents'].apply(lambda x: any(item in high_margin_categories for item in x))
].head(10)

# 8. ROI Analysis
current_avg_transaction = transactions_df.groupby('transaction_id')['transaction_value'].sum().mean()
target_avg_transaction = current_avg_transaction * 1.10  # 10% increase target
total_transactions = transactions_df['transaction_id'].nunique()
potential_revenue_increase = (target_avg_transaction - current_avg_transaction) * total_transactions

# 9. Store layout recommendations based on association rules
layout_recommendations = rules[rules['lift'] > 2].head(10)  # Get top 10 strongest associations

# 10. Print key insights
print("\n=== Market Basket Analysis Results ===")
print(f"\nCurrent Average Transaction Value: ${current_avg_transaction:.2f}")
print(f"Target Average Transaction Value: ${target_avg_transaction:.2f}")
print(f"Potential Annual Revenue Increase: ${potential_revenue_increase:.2f}")

print("\nTop 5 Product Category Associations:")
print(rules[['antecedents', 'consequents', 'lift']].head().to_string())

print("\nStore Performance Summary:")
print(store_metrics[['store_id', 'location_type', 'store_size', 'transaction_value']].head().to_string())

# 11. Save results to files
rules.to_csv('association_rules.csv', index=False)
store_metrics.to_csv('store_performance.csv', index=False)
promo_recommendations.to_csv('promotional_recommendations.csv', index=False)

# 12. Create visualizations
plt.figure(figsize=(10, 6))
sns.barplot(data=hourly_patterns, x='hour', y='transaction_value')
plt.title('Average Transaction Value by Hour')
plt.savefig('hourly_patterns.png')
plt.close()

plt.figure(figsize=(10, 6))
sns.barplot(data=weekly_patterns, x='day_of_week', y='transaction_value')
plt.title('Average Transaction Value by Day of Week')
plt.xticks(rotation=45)
plt.savefig('weekly_patterns.png')
plt.close()

  transactions_df['date'] = pd.to_datetime(transactions_df['date'], dayfirst=True)



=== Market Basket Analysis Results ===

Current Average Transaction Value: $52.43
Target Average Transaction Value: $57.68
Potential Annual Revenue Increase: $1048675.37

Top 5 Product Category Associations:
                                antecedents                            consequents      lift
6355                    (Snacks, Beverages)       (Meat, Household, Fresh Produce)  2.348230
6338       (Meat, Household, Fresh Produce)                    (Snacks, Beverages)  2.348230
4635  (Bakery, Canned Goods, Fresh Produce)                      (Meat, Household)  2.347511
4638                      (Meat, Household)  (Bakery, Canned Goods, Fresh Produce)  2.347511
4515  (Bakery, Canned Goods, Fresh Produce)              (Frozen Foods, Household)  2.347405

Store Performance Summary:
   store_id location_type store_size  transaction_value
0         1         Rural     Medium          12.332996
1         2         Urban      Small          12.246589
2         3         Rural     Medium 