In [None]:
# Install required packages (for Google Colab)
!pip install -q faker pandas numpy matplotlib seaborn scikit-learn scipy

# Import all required libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from faker import Faker

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

# Utilities
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
plt.style.use('seaborn-v0_8-darkgrid')

print("✅ All libraries imported successfully!")
print(f"Running on: Google Colab" if 'google.colab' in str(get_ipython()) else "Local Environment")


In [None]:
# Load the datasets
# For Google Colab: Upload your CSV files using the file upload button
# Or mount Google Drive if files are stored there

# Uncomment below if using Google Drive:
# from google.colab import drive
# drive.mount('/content/drive')

# Load datasets (adjust paths as needed)
try:
    users_df = pd.read_csv('fake_users.csv')
    products_df = pd.read_csv('fake_products.csv')
    transactions_df = pd.read_csv('fake_transactions.csv')
    print("✅ Datasets loaded successfully!")
except FileNotFoundError:
    print("❌ Please upload the CSV files: fake_users.csv, fake_products.csv, fake_transactions.csv")
    print("Use the file upload button in Colab or mount your Google Drive")
    raise

# Convert date columns to datetime
date_columns = {
    'users_df': ['last_purchase_date'],
    'products_df': ['packaging_date', 'expiry_date'],
    'transactions_df': ['purchase_date']
}

for df_name, cols in date_columns.items():
    df = eval(df_name)
    for col in cols:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col])

print(f"\nDataset shapes:")
print(f"Users: {users_df.shape}")
print(f"Products: {products_df.shape}")
print(f"Transactions: {transactions_df.shape}")


In [None]:
# Add essential time-based features to products
current_date = pd.Timestamp.now()

# Calculate days until expiry
products_df['days_until_expiry'] = (products_df['expiry_date'] - current_date).dt.days

# Calculate total shelf life
products_df['total_shelf_life'] = (products_df['expiry_date'] - products_df['packaging_date']).dt.days

# Add shelf_life_days if not present (some scripts expect this)
if 'shelf_life_days' not in products_df.columns:
    products_df['shelf_life_days'] = products_df['total_shelf_life']

# Display initial statistics
print("📊 Initial Product Statistics:")
print(f"Total products: {len(products_df)}")
print(f"Products already expired: {len(products_df[products_df['days_until_expiry'] < 0])}")
print(f"Products expiring today: {len(products_df[products_df['days_until_expiry'] == 0])}")
print(f"Products expiring within 30 days: {len(products_df[products_df['days_until_expiry'] <= 30])}")

# Remove expired products
expired_products = products_df[products_df['days_until_expiry'] < 0]
if len(expired_products) > 0:
    print(f"\n🗑️ Removing {len(expired_products)} expired products from the dataset")
    print("Sample of removed products:")
    print(expired_products[['product_id', 'name', 'category', 'expiry_date', 'days_until_expiry']].head())

# Keep only products that haven't expired yet
products_df = products_df[products_df['days_until_expiry'] >= 0].reset_index(drop=True)

# Also remove transactions for expired products to maintain consistency
valid_product_ids = products_df['product_id'].unique()
initial_transaction_count = len(transactions_df)
transactions_df = transactions_df[transactions_df['product_id'].isin(valid_product_ids)]
removed_transactions = initial_transaction_count - len(transactions_df)

print(f"\n✅ Data cleaned:")
print(f"   - Remaining products: {len(products_df)}")
print(f"   - Removed transactions for expired products: {removed_transactions}")
print(f"   - Remaining transactions: {len(transactions_df)}")

# Display sample of valid products
print("\n📦 Sample of valid products:")
print(products_df[['product_id', 'name', 'category', 'expiry_date', 'days_until_expiry']].head())


In [None]:
class DynamicThresholdCalculator:
    """
    Calculates dynamic thresholds for dead stock prediction based on multiple factors
    """
    
    def __init__(self, products_df, transactions_df):
        self.products_df = products_df
        self.transactions_df = transactions_df
        self.category_thresholds = {}
        self.product_thresholds = {}
        
    def calculate_category_baseline_thresholds(self):
        """Calculate baseline thresholds for each product category"""
        # Group by category and calculate metrics
        category_metrics = self.products_df.groupby('category').agg({
            'shelf_life_days': 'mean',
            'product_id': 'count'
        }).rename(columns={'product_id': 'product_count'})
        
        # Calculate sales velocity by category
        sales_by_category = self.transactions_df.merge(
            self.products_df[['product_id', 'category']], 
            on='product_id'
        )
        
        category_velocity = sales_by_category.groupby('category').agg({
            'quantity': 'sum',
            'purchase_date': lambda x: (x.max() - x.min()).days + 1
        }).rename(columns={'purchase_date': 'days_active'})
        
        category_velocity['avg_daily_sales'] = (
            category_velocity['quantity'] / category_velocity['days_active']
        )
        
        # Merge metrics
        category_analysis = category_metrics.merge(
            category_velocity[['avg_daily_sales']], 
            left_index=True, 
            right_index=True,
            how='left'
        )
        
        # Calculate dynamic thresholds
        for category in category_analysis.index:
            avg_shelf_life = category_analysis.loc[category, 'shelf_life_days']
            avg_velocity = category_analysis.loc[category, 'avg_daily_sales']
            
            # Base threshold is 20% of average shelf life
            base_threshold = avg_shelf_life * 0.2
            
            # Adjust based on velocity
            if avg_velocity > 10:
                velocity_factor = 0.7
            elif avg_velocity > 5:
                velocity_factor = 1.0
            else:
                velocity_factor = 1.3
                
            self.category_thresholds[category] = int(base_threshold * velocity_factor)
            
        return self.category_thresholds
    
    def calculate_product_specific_threshold(self, product_id):
        """Calculate threshold for a specific product"""
        product = self.products_df[self.products_df['product_id'] == product_id].iloc[0]
        
        # Start with category baseline
        category = product['category']
        base_threshold = self.category_thresholds.get(category, 30)
        
        # Get product's sales history
        product_sales = self.transactions_df[
            self.transactions_df['product_id'] == product_id
        ]
        
        # Factor 1: Sales velocity
        if len(product_sales) > 0:
            days_on_market = (product_sales['purchase_date'].max() - 
                            product_sales['purchase_date'].min()).days + 1
            sales_velocity = len(product_sales) / days_on_market
            
            if sales_velocity > 2:
                velocity_multiplier = 0.5
            elif sales_velocity > 1:
                velocity_multiplier = 0.7
            elif sales_velocity > 0.5:
                velocity_multiplier = 1.0
            else:
                velocity_multiplier = 1.5
        else:
            velocity_multiplier = 2.0
        
        # Factor 2: Price
        price = product['price_mrp']
        if price > 300:
            price_multiplier = 1.2
        elif price < 100:
            price_multiplier = 0.8
        else:
            price_multiplier = 1.0
            
        # Factor 3: Current discount
        current_discount = product.get('current_discount_percent', 0)
        if current_discount > 30:
            discount_multiplier = 0.7
        elif current_discount > 0:
            discount_multiplier = 0.9
        else:
            discount_multiplier = 1.0
            
        # Calculate final threshold
        dynamic_threshold = (base_threshold * 
                           velocity_multiplier * 
                           price_multiplier * 
                           discount_multiplier)
        
        # Ensure reasonable bounds
        min_threshold = max(3, product['shelf_life_days'] * 0.05)
        max_threshold = min(60, product['shelf_life_days'] * 0.4)
        
        final_threshold = int(np.clip(dynamic_threshold, min_threshold, max_threshold))
        
        return final_threshold

# Initialize the threshold calculator
threshold_calculator = DynamicThresholdCalculator(products_df, transactions_df)
category_thresholds = threshold_calculator.calculate_category_baseline_thresholds()

print("Dynamic Category Thresholds:")
for category, threshold in category_thresholds.items():
    print(f"  {category}: {threshold} days")


In [None]:
class DynamicRecommendationSystem:
    """
    Advanced recommendation system combining content-based and collaborative filtering
    """
    
    def __init__(self, users_df, products_df, transactions_df):
        self.users_df = users_df
        self.products_df = products_df
        self.transactions_df = transactions_df
        
        # Preprocessing
        self.le_diet = LabelEncoder()
        self.le_category = LabelEncoder()
        
        # Models
        self.content_similarity_matrix = None
        self.user_item_matrix = None
        self.user_factors = None
        self.item_factors = None
        self.product_features = None
        
    def prepare_content_features(self):
        """Create rich product features for content-based filtering"""
        products = self.products_df.copy()
        
        # Create text features
        products['content_text'] = (
            products['name'] + ' ' +
            products['category'] + ' ' +
            products['diet_type'] + ' ' +
            products['brand'] + ' ' +
            'price_' + pd.cut(products['price_mrp'], bins=5, labels=['very_low', 'low', 'medium', 'high', 'very_high']).astype(str)
        )
        
        # Add allergen information
        products['allergen_text'] = products['allergens'].apply(
            lambda x: ' '.join(eval(x)) if isinstance(x, str) and x != '[]' else ''
        )
        products['content_text'] += ' ' + products['allergen_text']
        
        # Encode categorical features
        products['diet_encoded'] = self.le_diet.fit_transform(products['diet_type'])
        products['category_encoded'] = self.le_category.fit_transform(products['category'])
        
        self.product_features = products
        return products
    
    def build_content_similarity_matrix(self):
        """Build product similarity matrix using content features"""
        products = self.prepare_content_features()
        
        # TF-IDF for text features
        tfidf = TfidfVectorizer(max_features=100, stop_words='english')
        tfidf_matrix = tfidf.fit_transform(products['content_text'])
        
        # Numerical features
        numerical_features = ['price_mrp', 'weight_grams', 'shelf_life_days', 
                            'current_discount_percent', 'diet_encoded', 'category_encoded']
        
        scaler = StandardScaler()
        numerical_matrix = scaler.fit_transform(products[numerical_features].fillna(0))
        
        # Combine features
        combined_features = np.hstack([
            tfidf_matrix.toarray() * 0.6,  # Text features
            numerical_matrix * 0.4          # Numerical features
        ])
        
        # Calculate similarity
        self.content_similarity_matrix = cosine_similarity(combined_features)
        return self.content_similarity_matrix
    
    def build_collaborative_filtering_model(self, n_factors=20):
        """Build collaborative filtering model using matrix factorization"""
        # Create user-item interaction matrix
        pivot_table = self.transactions_df.pivot_table(
            index='user_id',
            columns='product_id',
            values='quantity',
            aggfunc='sum',
            fill_value=0
        )
        
        # Add implicit feedback
        engagement_pivot = self.transactions_df.pivot_table(
            index='user_id',
            columns='product_id',
            values='user_engaged_with_deal',
            aggfunc='mean',
            fill_value=0
        )
        
        # Combine explicit and implicit feedback
        self.user_item_matrix = pivot_table + 0.5 * engagement_pivot
        
        # Convert to sparse matrix
        sparse_matrix = csr_matrix(self.user_item_matrix.values)
        
        # Apply SVD
        svd = TruncatedSVD(n_components=n_factors, random_state=42)
        self.user_factors = svd.fit_transform(sparse_matrix)
        self.item_factors = svd.components_.T
        
        return self.user_factors, self.item_factors
    
    def get_hybrid_recommendations(self, user_id, n_recommendations=5, 
                                 content_weight=0.4, collaborative_weight=0.6,
                                 focus_on_expiring=True):
        """Get hybrid recommendations combining both approaches"""
        recommendations = []
        
        # Get user's purchase history
        user_products = self.transactions_df[
            self.transactions_df['user_id'] == user_id
        ]['product_id'].unique()
        
        if len(user_products) == 0:
            # Cold start - return popular expiring products
            return self._get_popular_expiring_products(n_recommendations)
        
        # Build models if not already built
        if self.content_similarity_matrix is None:
            self.build_content_similarity_matrix()
        if self.user_factors is None:
            self.build_collaborative_filtering_model()
        
        # Get collaborative scores
        collab_scores = {}
        if user_id in self.user_item_matrix.index:
            user_idx = self.user_item_matrix.index.get_loc(user_id)
            user_vector = self.user_factors[user_idx]
            predicted_ratings = np.dot(user_vector, self.item_factors.T)
            
            for idx, rating in enumerate(predicted_ratings):
                product_id = self.user_item_matrix.columns[idx]
                collab_scores[product_id] = rating
        
        # Get content-based scores
        content_scores = {}
        for base_product_id in user_products[-3:]:  # Last 3 purchases
            if base_product_id in self.products_df['product_id'].values:
                # Reset index to ensure proper alignment
                products_reset = self.products_df.reset_index(drop=True)
                
                # Find the position of this product
                product_position = products_reset[
                    products_reset['product_id'] == base_product_id
                ].index[0]
                
                if product_position < len(self.content_similarity_matrix):
                    sim_scores = self.content_similarity_matrix[product_position]
                    for idx, score in enumerate(sim_scores):
                        if idx < len(products_reset):
                            product_id = products_reset.iloc[idx]['product_id']
                            if product_id != base_product_id:
                                if product_id not in content_scores:
                                    content_scores[product_id] = []
                                content_scores[product_id].append(score)
        
        # Average content scores
        for product_id in content_scores:
            content_scores[product_id] = np.mean(content_scores[product_id])
        
        # Combine scores
        all_products = set(collab_scores.keys()) | set(content_scores.keys())
        
        for product_id in all_products:
            product = self.products_df[self.products_df['product_id'] == product_id].iloc[0]
            
            # Skip expired products
            if product['days_until_expiry'] <= 0:
                continue
            
            # Check compatibility
            user = self.users_df[self.users_df['user_id'] == user_id].iloc[0]
            if not self._is_compatible(user, product):
                continue
            
            # Calculate hybrid score
            score = 0
            if product_id in collab_scores:
                score += collaborative_weight * collab_scores[product_id]
            if product_id in content_scores:
                score += content_weight * content_scores[product_id]
            
            # Apply urgency boost
            if focus_on_expiring and product['days_until_expiry'] <= 30:
                urgency_factor = 1 + (30 - product['days_until_expiry']) / 30 * 0.5
                score *= urgency_factor
            
            recommendations.append({
                'product_id': product_id,
                'product_name': product['name'],
                'category': product['category'],
                'days_until_expiry': product['days_until_expiry'],
                'price': product['price_mrp'],
                'discount': product['current_discount_percent'],
                'score': score
            })
        
        # Sort and return top N
        recommendations = sorted(recommendations, key=lambda x: x['score'], reverse=True)
        return pd.DataFrame(recommendations[:n_recommendations])
    
    def _is_compatible(self, user, product):
        """Check dietary and allergen compatibility"""
        # Diet compatibility
        diet_hierarchy = {
            "non-vegetarian": 3,
            "eggs": 2,
            "vegetarian": 1,
            "vegan": 0
        }
        
        if diet_hierarchy.get(product['diet_type'], 0) > diet_hierarchy.get(user['diet_type'], 3):
            return False
        
        # Allergen check
        user_allergies = eval(user['allergies']) if isinstance(user['allergies'], str) else user['allergies']
        product_allergens = eval(product['allergens']) if isinstance(product['allergens'], str) else product['allergens']
        
        if any(allergen in product_allergens for allergen in user_allergies):
            return False
        
        return True
    
    def _get_popular_expiring_products(self, n_recommendations):
        """Fallback for cold start"""
        expiring_products = self.products_df[
            (self.products_df['days_until_expiry'] > 0) & 
            (self.products_df['days_until_expiry'] <= 30)
        ].copy()
        
        # Calculate popularity
        product_popularity = self.transactions_df.groupby('product_id').agg({
            'quantity': 'sum',
            'user_id': 'nunique'
        }).rename(columns={'user_id': 'unique_buyers'})
        
        expiring_products = expiring_products.merge(
            product_popularity, 
            left_on='product_id', 
            right_index=True, 
            how='left'
        )
        
        expiring_products['score'] = (
            expiring_products['quantity'].fillna(0) * 0.5 +
            expiring_products['unique_buyers'].fillna(0) * 0.5
        )
        
        return expiring_products.nlargest(n_recommendations, 'score')[[
            'product_id', 'name', 'category', 'days_until_expiry', 
            'price_mrp', 'current_discount_percent'
        ]].rename(columns={'name': 'product_name'})

# Initialize the recommendation system
rec_system = DynamicRecommendationSystem(users_df, products_df, transactions_df)
print("✅ Dynamic Recommendation System initialized!")


In [None]:
class IntegratedWasteReductionSystem:
    """
    Complete waste reduction system with dynamic thresholds and ML-based recommendations
    """
    
    def __init__(self, users_df, products_df, transactions_df):
        self.users_df = users_df
        self.products_df = products_df
        self.transactions_df = transactions_df
        
        # Initialize components
        self.threshold_calculator = DynamicThresholdCalculator(products_df, transactions_df)
        self.rec_system = DynamicRecommendationSystem(users_df, products_df, transactions_df)
        
        # Enhanced products dataframe
        self.products_enhanced = None
        
        # Calculate initial features
        self.enhance_product_features()
        
    def enhance_product_features(self):
        """Add calculated features to products"""
        current_date = pd.Timestamp.now()
        self.products_enhanced = self.products_df.copy()
        
        # Time-based features
        self.products_enhanced['days_until_expiry'] = (
            self.products_enhanced['expiry_date'] - current_date
        ).dt.days
        
        # Sales metrics
        sales_metrics = self.transactions_df.groupby('product_id').agg({
            'quantity': ['sum', 'mean', 'count'],
            'purchase_date': ['min', 'max'],
            'discount_percent': 'mean'
        }).reset_index()
        
        sales_metrics.columns = ['product_id', 'total_quantity_sold', 'avg_quantity_per_sale',
                                'number_of_sales', 'first_sale_date', 'last_sale_date',
                                'avg_discount_given']
        
        # Calculate sales velocity
        sales_metrics['days_since_last_sale'] = (
            current_date - sales_metrics['last_sale_date']
        ).dt.days
        sales_metrics['days_on_market'] = (
            sales_metrics['last_sale_date'] - sales_metrics['first_sale_date']
        ).dt.days + 1
        sales_metrics['sales_velocity'] = (
            sales_metrics['total_quantity_sold'] / sales_metrics['days_on_market']
        )
        
        # Merge with products
        self.products_enhanced = self.products_enhanced.merge(
            sales_metrics, on='product_id', how='left'
        )
        
        # Fill NaN values
        self.products_enhanced['sales_velocity'].fillna(0, inplace=True)
        self.products_enhanced['days_since_last_sale'].fillna(999, inplace=True)
        
        # Calculate dynamic thresholds
        self.threshold_calculator.calculate_category_baseline_thresholds()
        self.products_enhanced['dynamic_threshold'] = self.products_enhanced['product_id'].apply(
            lambda x: self.threshold_calculator.calculate_product_specific_threshold(x)
        )
        
        # Calculate dead stock risk with dynamic thresholds
        self.products_enhanced['is_dead_stock_risk'] = self.products_enhanced.apply(
            self._calculate_dead_stock_risk_dynamic, axis=1
        )
        
        # Risk score (0-1)
        self.products_enhanced['dead_stock_risk_score'] = self.products_enhanced.apply(
            self._calculate_risk_score, axis=1
        )
        
    def _calculate_dead_stock_risk_dynamic(self, row):
        """Calculate dead stock risk using dynamic thresholds"""
        if row['days_until_expiry'] <= 0:
            return 1
        
        if row['days_until_expiry'] <= row['dynamic_threshold']:
            if row['sales_velocity'] == 0:
                return 1
            
            projected_sales = row['sales_velocity'] * row['days_until_expiry']
            if projected_sales < 50:  # Assuming need to sell at least 50 units
                return 1
        
        return 0
    
    def _calculate_risk_score(self, row):
        """Calculate continuous risk score"""
        # Expiry urgency (0-0.5)
        expiry_score = max(0, min(0.5, (row['dynamic_threshold'] - row['days_until_expiry']) / row['dynamic_threshold'] * 0.5))
        
        # Sales velocity score (0-0.3)
        velocity_score = 0.3 * (1 - min(1, row['sales_velocity'] / 5))
        
        # Stagnation score (0-0.2)
        stagnation_score = 0.2 * min(1, row['days_since_last_sale'] / 30)
        
        return expiry_score + velocity_score + stagnation_score
    
    def get_waste_reduction_metrics(self):
        """Calculate comprehensive metrics"""
        metrics = {
            'total_products': len(self.products_enhanced),
            'products_at_risk': len(self.products_enhanced[self.products_enhanced['is_dead_stock_risk'] == 1]),
            'products_expiring_7_days': len(self.products_enhanced[self.products_enhanced['days_until_expiry'] <= 7]),
            'products_expiring_30_days': len(self.products_enhanced[self.products_enhanced['days_until_expiry'] <= 30]),
            'avg_risk_score': self.products_enhanced['dead_stock_risk_score'].mean(),
            'high_risk_products': len(self.products_enhanced[self.products_enhanced['dead_stock_risk_score'] > 0.7])
        }
        
        metrics['percentage_at_risk'] = metrics['products_at_risk'] / metrics['total_products'] * 100
        
        return metrics
    
    def get_personalized_waste_reduction_plan(self, top_users=10, recommendations_per_user=5):
        """Generate personalized recommendations for top users"""
        # Identify users with highest purchase frequency
        user_activity = self.transactions_df.groupby('user_id').agg({
            'purchase_date': 'count',
            'quantity': 'sum'
        }).rename(columns={'purchase_date': 'purchase_count'})
        
        top_user_ids = user_activity.nlargest(top_users, 'purchase_count').index
        
        waste_reduction_plan = []
        
        for user_id in top_user_ids:
            # Get recommendations
            user_recs = self.rec_system.get_hybrid_recommendations(
                user_id, 
                n_recommendations=recommendations_per_user,
                focus_on_expiring=True
            )
            
            if not user_recs.empty:
                user_recs['user_id'] = user_id
                waste_reduction_plan.append(user_recs)
        
        if waste_reduction_plan:
            return pd.concat(waste_reduction_plan, ignore_index=True)
        else:
            return pd.DataFrame()
    
    def simulate_impact(self, conversion_rate=0.3):
        """Simulate the impact of recommendations"""
        plan = self.get_personalized_waste_reduction_plan()
        
        if plan.empty:
            return {"error": "No recommendations generated"}
        
        # Calculate potential waste saved
        recommended_products = plan['product_id'].unique()
        at_risk_products = self.products_enhanced[
            self.products_enhanced['is_dead_stock_risk'] == 1
        ]
        
        covered_products = at_risk_products[
            at_risk_products['product_id'].isin(recommended_products)
        ]
        
        impact = {
            'total_at_risk_products': len(at_risk_products),
            'products_covered_by_recommendations': len(covered_products),
            'coverage_percentage': len(covered_products) / len(at_risk_products) * 100 if len(at_risk_products) > 0 else 0,
            'estimated_products_saved': int(len(covered_products) * conversion_rate),
            'total_recommendations_generated': len(plan),
            'unique_users_targeted': plan['user_id'].nunique()
        }
        
        return impact

# Initialize the integrated system
integrated_system = IntegratedWasteReductionSystem(users_df, products_df, transactions_df)

# Get metrics
metrics = integrated_system.get_waste_reduction_metrics()
print("📊 Waste Reduction Metrics:")
print("="*50)
for key, value in metrics.items():
    if isinstance(value, float):
        print(f"{key}: {value:.2f}")
    else:
        print(f"{key}: {value}")


In [None]:
# Create comprehensive visualizations
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Integrated Waste Reduction System Analysis', fontsize=16)

# 1. Dynamic Thresholds by Category
ax1 = axes[0, 0]
categories = list(integrated_system.threshold_calculator.category_thresholds.keys())
thresholds = list(integrated_system.threshold_calculator.category_thresholds.values())
ax1.bar(categories, thresholds, color='skyblue', edgecolor='navy')
ax1.set_xlabel('Product Category')
ax1.set_ylabel('Dynamic Threshold (days)')
ax1.set_title('Dynamic Thresholds by Category')
ax1.tick_params(axis='x', rotation=45)

# 2. Risk Score Distribution
ax2 = axes[0, 1]
integrated_system.products_enhanced['dead_stock_risk_score'].hist(
    bins=30, ax=ax2, color='coral', edgecolor='darkred'
)
ax2.set_xlabel('Risk Score')
ax2.set_ylabel('Number of Products')
ax2.set_title('Dead Stock Risk Score Distribution')
ax2.axvline(x=0.7, color='red', linestyle='--', label='High Risk Threshold')
ax2.legend()

# 3. Days Until Expiry vs Sales Velocity
ax3 = axes[0, 2]
scatter = ax3.scatter(
    integrated_system.products_enhanced['days_until_expiry'],
    integrated_system.products_enhanced['sales_velocity'],
    c=integrated_system.products_enhanced['is_dead_stock_risk'],
    cmap='RdYlGn_r',
    alpha=0.6
)
ax3.set_xlabel('Days Until Expiry')
ax3.set_ylabel('Sales Velocity (units/day)')
ax3.set_title('Product Risk Analysis')
plt.colorbar(scatter, ax=ax3, label='Dead Stock Risk')

# 4. Category Risk Analysis
ax4 = axes[1, 0]
risk_by_category = integrated_system.products_enhanced.groupby('category')['is_dead_stock_risk'].agg(['sum', 'count'])
risk_by_category['risk_percentage'] = (risk_by_category['sum'] / risk_by_category['count'] * 100)
risk_by_category['risk_percentage'].plot(kind='bar', ax=ax4, color='orange')
ax4.set_xlabel('Category')
ax4.set_ylabel('% Products at Risk')
ax4.set_title('Dead Stock Risk by Category')
ax4.tick_params(axis='x', rotation=45)

# 5. Impact Simulation
ax5 = axes[1, 1]
impact = integrated_system.simulate_impact()
if 'error' not in impact:
    impact_data = pd.Series({
        'At Risk': impact['total_at_risk_products'],
        'Covered': impact['products_covered_by_recommendations'],
        'Saved': impact['estimated_products_saved']
    })
    impact_data.plot(kind='bar', ax=ax5, color=['red', 'yellow', 'green'])
    ax5.set_ylabel('Number of Products')
    ax5.set_title('Recommendation System Impact')
    ax5.tick_params(axis='x', rotation=0)

# 6. User Engagement Potential
ax6 = axes[1, 2]
user_engagement = integrated_system.transactions_df.groupby('user_id')['quantity'].sum().describe()
engagement_data = pd.Series({
    'Low (Q1)': user_engagement['25%'],
    'Medium (Q2)': user_engagement['50%'],
    'High (Q3)': user_engagement['75%'],
    'Very High (Max)': user_engagement['max']
})
engagement_data.plot(kind='bar', ax=ax6, color='purple', alpha=0.7)
ax6.set_ylabel('Total Quantity Purchased')
ax6.set_title('User Engagement Levels')
ax6.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()


In [None]:
# 1. Show products with highest risk scores
print("🚨 TOP 10 HIGHEST RISK PRODUCTS:")
print("="*100)
high_risk_products = integrated_system.products_enhanced.nlargest(10, 'dead_stock_risk_score')
print(high_risk_products[['product_id', 'name', 'category', 'days_until_expiry', 
                          'dynamic_threshold', 'sales_velocity', 'dead_stock_risk_score']].to_string())

# 2. Generate personalized recommendations
print("\n\n📋 PERSONALIZED WASTE REDUCTION PLAN:")
print("="*100)
waste_plan = integrated_system.get_personalized_waste_reduction_plan(top_users=5, recommendations_per_user=3)

if not waste_plan.empty:
    for user_id in waste_plan['user_id'].unique():
        user_info = users_df[users_df['user_id'] == user_id].iloc[0]
        user_recs = waste_plan[waste_plan['user_id'] == user_id]
        
        print(f"\n👤 User {user_id}:")
        print(f"   Diet: {user_info['diet_type']}, Allergies: {user_info['allergies']}")
        print(f"   Prefers Discount: {user_info['prefers_discount']}")
        print("   Recommendations:")
        
        for _, rec in user_recs.iterrows():
            print(f"   → {rec['product_name']} ({rec['category']})")
            print(f"     Days to expiry: {rec['days_until_expiry']}, Price: ${rec['price']:.2f}, Discount: {rec['discount']}%")
            print(f"     Score: {rec['score']:.3f}")

# 3. Show impact analysis
print("\n\n📊 IMPACT ANALYSIS:")
print("="*100)
impact = integrated_system.simulate_impact(conversion_rate=0.3)

if 'error' not in impact:
    print(f"Total at-risk products: {impact['total_at_risk_products']}")
    print(f"Products covered by recommendations: {impact['products_covered_by_recommendations']}")
    print(f"Coverage percentage: {impact['coverage_percentage']:.1f}%")
    print(f"Estimated products saved (30% conversion): {impact['estimated_products_saved']}")
    print(f"Total recommendations generated: {impact['total_recommendations_generated']}")
    print(f"Unique users targeted: {impact['unique_users_targeted']}")
    
    # Calculate potential revenue saved
    avg_price = integrated_system.products_enhanced['price_mrp'].mean()
    revenue_saved = impact['estimated_products_saved'] * avg_price
    print(f"\n💰 Estimated revenue saved: ${revenue_saved:,.2f}")


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.


In [None]:
### Step 2.3: Build Dead Stock Prediction Model

We'll use a Random Forest Classifier to predict dead stock risk based on product features. This model will help us proactively identify products that need intervention.
