In [None]:
import polars as pl
import random
from efficient_apriori import apriori
from ast import literal_eval
from collections import Counter


# Data loading and preprocessing
def load_data(num_orders=1000, num_products_per_order=(1, 5)):
    """Generate a larger transactions dataset with randomized order-product associations."""
    
    products = ["Bread", "Butter", "Milk", "Eggs", "Cheese", "Cereal", "Juice", "Apples", "Bananas", "Chicken"]
    
    data = {
        "order_id": [],
        "product_name": []
    }
    
    for order_id in range(1, num_orders + 1):
        num_products = random.randint(*num_products_per_order)  # Random number of products per order
        selected_products = random.sample(products, num_products)  # Randomly select products
        
        for product in selected_products:
            data["order_id"].append(order_id)
            data["product_name"].append(product)
    
    return pl.DataFrame(data)

def preprocess_transactions(data: pl.DataFrame):
    """Group transactions by order_id and return list of lists for Apriori."""
    return data.group_by("order_id").agg(pl.col("product_name")).get_column("product_name").to_list()

def run_apriori(transactions, min_support=0.5, min_confidence=0.6):
    """Run Apriori and return association rules sorted by lift."""
    itemsets, rules = apriori(transactions, min_support=min_support, min_confidence=min_confidence)
    return sorted(rules, key=lambda rule: rule.lift, reverse=True)

def format_rules(rules):
    """Convert rules into a Polars DataFrame for better visualization."""
    return pl.DataFrame([{
        "antecedent": str(rule.lhs),
        "consequent": str(rule.rhs),
        "confidence": rule.confidence,
        "support": rule.support,
        "lift": rule.lift
    } for rule in rules])

def query_rules(rules_df, antecedents=None, min_confidence=None, top_n=None):
    """Query rules dynamically using Polars filters."""
    query = rules_df

    # Convert antecedent column to actual lists (fixing the warning with return_dtype)
    query = query.with_columns(pl.col("antecedent").map_elements(literal_eval, return_dtype=pl.List(pl.Utf8)))

    # Filter by antecedent set (if specified)
    if antecedents:
        antecedent_set = set(antecedents)  # Convert to set for easy subset checking
        is_subset_map = lambda x: antecedent_set.issubset(set(x))  # Check if antecedent is a subset
        query = query.filter(pl.col("antecedent").map_elements(is_subset_map, return_dtype=pl.Boolean))

    # Filter by confidence threshold
    if min_confidence:
        query = query.filter(pl.col("confidence") >= min_confidence)

    # Return top N rules if requested
    if top_n:
        query = query.sort("lift", descending=True).head(top_n)

    return query

# Business Use Case Implementations
def predict_next_purchase(rules_df, products_in_basket):
    """Predict the next purchase based on the products in the current basket."""
    return query_rules(rules_df, antecedents=products_in_basket)

def basket_closure_prediction(rules_df, products_in_basket, threshold=0.7):
    """Predict if the basket can be closed based on the confidence of associated rules."""
    next_purchase_rules = query_rules(rules_df, antecedents=products_in_basket, min_confidence=threshold)
    result = {
        "basket": products_in_basket,
        "can_be_closed": next_purchase_rules.is_empty(),
        "suggested_additions": next_purchase_rules.to_dicts() if not next_purchase_rules.is_empty() else None
    }
    return result

def cross_sell_opportunities(rules_df, products_in_basket):
    """Identify cross-sell opportunities based on the current products in the basket."""
    return query_rules(rules_df, antecedents=products_in_basket)

def abandoned_cart_recovery(rules_df, products_in_basket, threshold=0.8):
    """Identify opportunities to recover abandoned carts by suggesting products."""
    return query_rules(rules_df, antecedents=products_in_basket, min_confidence=threshold)

def frequent_product_combinations(rules_df):
    """Find the most frequent product combinations."""
    return query_rules(rules_df, top_n=5)

def personalized_shopping_lists(rules_df, user_purchase_history):
    """Generate personalized shopping list based on a user's purchase history."""
    return predict_next_purchase(rules_df, user_purchase_history)

def seasonal_and_promotional_recommendations(rules_df, seasonal_products):
    """Generate recommendations based on seasonal or promotional products."""
    return cross_sell_opportunities(rules_df, seasonal_products)

# Load and preprocess data
data = load_data()
transactions = preprocess_transactions(data)
num_transactions = len(transactions)

# Run Apriori algorithm
# Count item frequency
flat_transactions = [item for sublist in transactions for item in sublist]
item_counts = Counter(flat_transactions)

# Set min_support dynamically (e.g., items appearing in at least 2% of transactions)
min_support = min(count / num_transactions for count in item_counts.values()) * 0.02
min_support = max(min_support, 0.01)  # Ensure a minimum threshold

# Scale min_confidence based on dataset size
min_confidence = max(0.05, min(0.3, 50 / num_transactions))  # Ensures reasonable thresholds

rules = run_apriori(transactions, min_support=min_support, min_confidence=min_confidence)


# Format rules as a DataFrame
rules_df = format_rules(rules)

# Example Use Cases
def run_use_cases():
    results = []

    results.append({
        "use_case": "Predict Next Purchase for {'Bread', 'Butter'}",
        "output": predict_next_purchase(rules_df, ['Bread', 'Butter']).to_dicts()
    })

    results.append({
        "use_case": "Basket Closure Prediction for {'Bread', 'Milk'}",
        "output": basket_closure_prediction(rules_df, ['Bread', 'Milk'])
    })

    results.append({
        "use_case": "Cross-Sell Opportunities for {'Bread'}",
        "output": cross_sell_opportunities(rules_df, ['Bread']).to_dicts()
    })

    results.append({
        "use_case": "Abandoned Cart Recovery for {'Eggs'}",
        "output": abandoned_cart_recovery(rules_df, ['Eggs']).to_dicts()
    })

    results.append({
        "use_case": "Frequent Product Combinations",
        "output": frequent_product_combinations(rules_df).to_dicts()
    })

    results.append({
        "use_case": "Personalized Shopping List for {'Bread'}",
        "output": personalized_shopping_lists(rules_df, ['Bread']).to_dicts()
    })

    results.append({
        "use_case": "Seasonal/Promotional Recommendations for {'Eggs'}",
        "output": seasonal_and_promotional_recommendations(rules_df, ['Eggs']).to_dicts()
    })

    return results

# Running the use cases and returning the results as a list of JSON-like dictionaries
use_case_results = run_use_cases()

# You can use 'use_case_results' for any further processing or as an output.
use_case_results