In [None]:
# Get redistribution candidates
candidates_df, summary = engine.get_redistribution_candidates()

if not candidates_df.empty:
    # Warning level distribution
    warning_counts = pd.Series(summary['by_warning_level'])
    fig = px.pie(
        values=warning_counts.values,
        names=warning_counts.index,
        title='Items by Warning Level',
        color_discrete_sequence=px.colors.sequential.RdYlGn_r
    )
    fig.show()

    # Category analysis of critical items
    critical_items = candidates_df[candidates_df['warning_level'] == 'critical']
    if not critical_items.empty:
        fig = px.histogram(
            critical_items,
            x='category',
            title='Categories of Critical Items',
            color='category'
        )
        fig.show()
    else:
        print("No critical items found in the current inventory.")

    # Risk analysis
    risk_scores = []
    for _, item in candidates_df.iterrows():
        risk_analysis = analyze_item_risk(item)
        risk_scores.append({
            'product_name': item['product_name'],
            'category': item['category'],
            'overall_risk': risk_analysis['overall_risk'],
            **risk_analysis['risk_factors']
        })

    risk_df = pd.DataFrame(risk_scores)

    # Risk factors correlation
    risk_factors = ['temperature_risk', 'humidity_risk', 'time_risk', 'model_risk']
    fig = px.imshow(
        risk_df[risk_factors].corr(),
        title='Risk Factors Correlation',
        labels=dict(color='Correlation'),
        color_continuous_scale='RdBu_r'
    )
    fig.show()

    # Distribution of overall risk by category with violin plot
    fig = go.Figure()
    for category in risk_df['category'].unique():
        cat_data = risk_df[risk_df['category'] == category]['overall_risk']
        fig.add_trace(go.Violin(
            x=[category] * len(cat_data),
            y=cat_data,
            name=category,
            box_visible=True,
            meanline_visible=True
        ))
    
    fig.update_layout(
        title='Overall Risk Distribution by Category',
        xaxis_title='Category',
        yaxis_title='Risk Score',
        showlegend=False
    )
    fig.show()
else:
    print("No items currently need redistribution.")

# Redistribution Analysis

Let's analyze the effectiveness of our redistribution system:

In [None]:
# NGO locations map
ngos_df['hover_text'] = ngos_df.apply(
    lambda x: f"Name: {x['ngo_name']}<br>Location: {x['location']}<br>Capacity: {x['capacity_kg']} kg",
    axis=1
)

fig = px.scatter_mapbox(
    ngos_df,
    lat='latitude',
    lon='longitude',
    hover_name='ngo_name',
    hover_data=['location', 'capacity_kg'],
    color='capacity_kg',
    size='capacity_kg',
    title='NGO Network Distribution',
    mapbox_style='open-street-map',
    custom_data=['hover_text']
)
fig.update_traces(
    hovertemplate="%{customdata[0]}<extra></extra>"
)
fig.show()

# NGO capacity analysis
fig = px.bar(
    ngos_df.sort_values('capacity_kg', ascending=False),  # Sort by capacity
    x='ngo_name',
    y='capacity_kg',
    title='NGO Capacity Distribution',
    labels={'capacity_kg': 'Capacity (kg)', 'ngo_name': 'NGO Name'}
)
fig.update_layout(xaxis_tickangle=-45)  # Rotate labels for better readability
fig.show()

# Category acceptance analysis
category_acceptances = []
for idx, row in ngos_df.iterrows():
    categories = row['accepted_categories'].split('|')
    for category in categories:
        category_acceptances.append({
            'ngo_name': row['ngo_name'],
            'category': category,
            'capacity_kg': row['capacity_kg']
        })

acceptance_df = pd.DataFrame(category_acceptances)

# Create a matrix of NGO categories and capacity
matrix_df = acceptance_df.pivot_table(
    index='category',
    columns='ngo_name',
    values='capacity_kg',
    aggfunc='sum',
    fill_value=0
)

fig = px.imshow(
    matrix_df,
    title='Category Acceptance Heat Map',
    labels={'x': 'NGO Name', 'y': 'Category', 'color': 'Capacity (kg)'},
    aspect='auto'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()

# NGO Network Analysis

Now let's analyze our NGO network's distribution and capabilities:

In [None]:
# Category distribution
fig = px.pie(
    inventory_df,
    names='category',
    title='Distribution of Items by Category'
)
fig.show()

# Storage type analysis
fig = px.histogram(
    inventory_df,
    x='storage_type',
    color='category',
    title='Storage Types by Category',
    barmode='group'  # Show bars side by side
)
fig.show()

# Temperature and humidity distribution
fig = px.scatter(
    inventory_df,
    x='temperature_c',
    y='humidity_percent',
    color='category',
    title='Temperature vs Humidity by Category',
    labels={
        'temperature_c': 'Temperature (°C)',
        'humidity_percent': 'Humidity (%)'
    },
    trendline="ols"  # Add trend line
)
fig.show()

# Days until expiry by category
fig = px.box(
    inventory_df,
    x='category',
    y='days_until_expiry',
    title='Days Until Expiry by Category',
    labels={
        'days_until_expiry': 'Days Until Expiry',
        'category': 'Product Category'
    }
)
fig.update_layout(showlegend=True)
fig.show()

# Inventory Analysis

Let's analyze the distribution of items in our inventory by various factors:

In [4]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import sys
from datetime import datetime

# Add project root to path
project_root = str(Path.cwd().parent)
if project_root not in sys.path:
    sys.path.append(project_root)

# Import custom modules
from backend.engine import RedistributionEngine
from backend.utils import estimate_co2_savings
from ml.utils import analyze_item_risk

# Load data
inventory_df = pd.read_csv('../data/mock_inventory.csv')
ngos_df = pd.read_csv('../data/mock_ngos.csv')

# Initialize engine
engine = RedistributionEngine()

# Pre-process dates
inventory_df['expiry_date'] = pd.to_datetime(inventory_df['expiry_date'])
inventory_df['stock_date'] = pd.to_datetime(inventory_df['stock_date'])
inventory_df['days_until_expiry'] = (inventory_df['expiry_date'] - pd.Timestamp.now()).dt.days

ModuleNotFoundError: No module named 'logistics'

# Zero Waste AI: Data Analysis and Visualization

This notebook analyzes and visualizes the data used in our food waste reduction system, including:

1. Inventory distribution and patterns
2. NGO network analysis
3. Redistribution effectiveness
4. Environmental impact metrics

# Environmental Impact Analysis

Finally, let's analyze the environmental impact of our redistribution system:

In [None]:
# Calculate potential impact
if not candidates_df.empty:
    total_potential_savings = 0
    impact_data = []

    # Analyze each candidate item with its best NGO match
    for _, item in candidates_df.iterrows():
        matches, stats = engine.find_best_matches(item.to_dict())
        if matches:
            distance = matches[0]['distance_km']
            co2_saved = estimate_co2_savings(distance)
            total_potential_savings += co2_saved
            impact_data.append({
                'product_name': item['product_name'],
                'category': item['category'],
                'distance_km': distance,
                'co2_savings_kg': co2_saved,
                'ngo_name': matches[0]['ngo_name']
            })

    if impact_data:
        impact_df = pd.DataFrame(impact_data)

        # Distance vs CO2 savings by category
        fig = px.scatter(
            impact_df,
            x='distance_km',
            y='co2_savings_kg',
            color='category',
            title='CO2 Savings vs Distance by Category',
            labels={
                'distance_km': 'Distance (km)',
                'co2_savings_kg': 'CO2 Savings (kg)'
            },
            trendline="ols"
        )
        fig.show()

        # CO2 savings by category
        fig = px.box(
            impact_df,
            x='category',
            y='co2_savings_kg',
            title='CO2 Savings Distribution by Category',
            labels={
                'co2_savings_kg': 'CO2 Savings (kg)',
                'category': 'Product Category'
            }
        )
        fig.show()

        # Top NGO contributions
        ngo_impact = impact_df.groupby('ngo_name').agg({
            'co2_savings_kg': 'sum',
            'distance_km': 'mean',
            'product_name': 'count'
        }).reset_index()

        ngo_impact.columns = ['NGO Name', 'Total CO2 Savings (kg)', 'Avg Distance (km)', 'Items Count']
        
        fig = go.Figure(data=[
            go.Bar(name='CO2 Savings', y=ngo_impact['NGO Name'], x=ngo_impact['Total CO2 Savings (kg)'])
        ])
        fig.update_layout(
            title='Top NGO Contributors to CO2 Savings',
            xaxis_title='Total CO2 Savings (kg)',
            yaxis_title='NGO Name',
            barmode='group'
        )
        fig.show()

        # Print summary statistics
        print("\nEnvironmental Impact Summary:")
        print(f"Total potential CO2 savings: {total_potential_savings:.2f} kg")
        print(f"Average CO2 savings per item: {impact_df['co2_savings_kg'].mean():.2f} kg")
        print(f"Average distance per redistribution: {impact_df['distance_km'].mean():.2f} km")
        print(f"Number of successful matches: {len(impact_df)}")
    else:
        print("No successful NGO matches found for redistribution candidates.")
else:
    print("No items currently need redistribution.")