In [1]:
"""
END-TO-END PROCUREMENT-TO-REVENUE ANALYTICS & INSIGHTS STUDY
Part 1: Exploratory Data Analysis (EDA)

This notebook performs comprehensive exploratory analysis across all business domains
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sqlalchemy import create_engine
import warnings
warnings.filterwarnings('ignore')

# Set visualization style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

print("="*80)
print("PROCUREMENT-TO-REVENUE ANALYTICS - EXPLORATORY DATA ANALYSIS")
print("="*80)

PROCUREMENT-TO-REVENUE ANALYTICS - EXPLORATORY DATA ANALYSIS


In [4]:
# ============================================================================
# DATABASE CONNECTION
# ============================================================================

# Configure your database connection
DB_CONFIG = {
    'host': 'localhost',
    'user': 'root',
    'password': '',  # CHANGE THIS
    'database': 'hyfun_analytics'
}

# Create connection string
connection_string = f"mysql+pymysql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}/{DB_CONFIG['database']}"

# Create engine
engine = create_engine(connection_string)

print("\n[1/10] Connecting to database...")
print(f"âœ“ Connected to {DB_CONFIG['database']}")


[1/10] Connecting to database...
âœ“ Connected to hyfun_analytics


In [5]:
# ============================================================================
# LOAD DATA FROM DATABASE
# ============================================================================

print("\n[2/10] Loading data from database...")

# Load all tables
farmers = pd.read_sql("SELECT * FROM farmers_master", engine)
products = pd.read_sql("SELECT * FROM product_master", engine)
procurement = pd.read_sql("SELECT * FROM potato_procurement", engine)
production = pd.read_sql("SELECT * FROM production_batches", engine)
quality = pd.read_sql("SELECT * FROM quality_control", engine)
downtime = pd.read_sql("SELECT * FROM machine_downtime", engine)
wastage = pd.read_sql("SELECT * FROM wastage_tracking", engine)
b2b_customers = pd.read_sql("SELECT * FROM b2b_customers", engine)
b2b_orders = pd.read_sql("SELECT * FROM b2b_orders", engine)
export_shipments = pd.read_sql("SELECT * FROM export_shipments", engine)
b2c_sales = pd.read_sql("SELECT * FROM b2c_sales", engine)
revenue = pd.read_sql("SELECT * FROM revenue_summary", engine)

print(f"âœ“ Loaded 12 tables successfully")
print(f"âœ“ Total records: {sum([len(df) for df in [farmers, products, procurement, production, quality, downtime, wastage, b2b_customers, b2b_orders, export_shipments, b2c_sales, revenue]]):,}")



[2/10] Loading data from database...
âœ“ Loaded 12 tables successfully
âœ“ Total records: 164,204


In [6]:
# ============================================================================
# DATA OVERVIEW
# ============================================================================

print("\n[3/10] Data Overview...")
print("\n" + "="*80)
print("DATASET SUMMARY")
print("="*80)

datasets = {
    'Farmers': farmers,
    'Products': products,
    'Procurement': procurement,
    'Production': production,
    'Quality Control': quality,
    'Machine Downtime': downtime,
    'Wastage': wastage,
    'B2B Customers': b2b_customers,
    'B2B Orders': b2b_orders,
    'Export Shipments': export_shipments,
    'B2C Sales': b2c_sales,
    'Revenue': revenue
}

for name, df in datasets.items():
    print(f"{name:<20} | {len(df):>10,} records | {len(df.columns):>3} columns")



[3/10] Data Overview...

DATASET SUMMARY
Farmers              |        500 records |   9 columns
Products             |         10 records |  10 columns
Procurement          |      3,262 records |  10 columns
Production           |     12,528 records |  11 columns
Quality Control      |      3,727 records |  11 columns
Machine Downtime     |      1,701 records |  10 columns
Wastage              |      1,927 records |   8 columns
B2B Customers        |        200 records |  13 columns
B2B Orders           |      3,605 records |  12 columns
Export Shipments     |      3,338 records |  11 columns
B2C Sales            |    127,943 records |  11 columns
Revenue              |      5,463 records |   8 columns


In [7]:
# ============================================================================
# SECTION 1: SUPPLY CHAIN ANALYTICS
# ============================================================================

print("\n[4/10] Analyzing Supply Chain...")
print("\n" + "="*80)
print("SUPPLY CHAIN INSIGHTS")
print("="*80)

# Convert date columns
procurement['procurement_date'] = pd.to_datetime(procurement['procurement_date'])

# 1.1 Regional Analysis
regional_summary = procurement.merge(farmers[['farmer_id', 'region']], on='farmer_id')
regional_stats = regional_summary.groupby('region').agg({
    'quantity_mt': 'sum',
    'price_per_mt': 'mean',
    'batch_id': 'count'
}).round(2)
regional_stats.columns = ['Total Volume (MT)', 'Avg Price (â‚¹/MT)', 'Deliveries']
print("\nðŸ“Š Regional Performance:")
print(regional_stats.sort_values('Total Volume (MT)', ascending=False))

# 1.2 Quality Distribution
quality_dist = procurement['quality_grade'].value_counts()
print("\nðŸ“Š Quality Grade Distribution:")
print(quality_dist)
print(f"\nPremium Rate: {quality_dist['Premium'] / len(procurement) * 100:.2f}%")

# 1.3 Seasonal Pattern
procurement['month'] = procurement['procurement_date'].dt.month
seasonal = procurement.groupby('month').agg({
    'quantity_mt': 'sum',
    'price_per_mt': 'mean'
}).round(2)
print("\nðŸ“Š Seasonal Pattern (Top 3 Months by Volume):")
print(seasonal.sort_values('quantity_mt', ascending=False).head(3))

# Visualization 1: Regional Quality Mix
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
regional_quality = regional_summary.groupby(['region', 'quality_grade'])['quantity_mt'].sum().unstack(fill_value=0)
regional_quality.plot(kind='bar', stacked=True, ax=plt.gca())
plt.title('Regional Quality Mix by Volume', fontsize=14, fontweight='bold')
plt.xlabel('Region')
plt.ylabel('Volume (MT)')
plt.legend(title='Quality Grade')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
seasonal_price = procurement.groupby('month')['price_per_mt'].mean()
plt.plot(seasonal_price.index, seasonal_price.values, marker='o', linewidth=2, markersize=8)
plt.title('Seasonal Price Variation', fontsize=14, fontweight='bold')
plt.xlabel('Month')
plt.ylabel('Avg Price (â‚¹/MT)')
plt.grid(True, alpha=0.3)
plt.xticks(range(1, 13))

plt.tight_layout()
plt.savefig('01_supply_chain_analysis.png', dpi=300, bbox_inches='tight')
print("\nâœ“ Saved: 01_supply_chain_analysis.png")
plt.close()


[4/10] Analyzing Supply Chain...

SUPPLY CHAIN INSIGHTS

ðŸ“Š Regional Performance:
                 Total Volume (MT)  Avg Price (â‚¹/MT)  Deliveries
region                                                          
North Gujarat              8600.38          21056.08         724
South Gujarat              8015.13          21194.38         679
Kutch                      7777.03          21308.89         675
Central Gujarat            7294.10          21413.77         621
Saurashtra                 6794.76          21025.46         563

ðŸ“Š Quality Grade Distribution:
quality_grade
Grade A    1304
Grade B     829
Premium     809
Grade C     320
Name: count, dtype: int64

Premium Rate: 24.80%

ðŸ“Š Seasonal Pattern (Top 3 Months by Volume):
       quantity_mt  price_per_mt
month                           
10         5019.68      18284.08
12         4916.14      18383.60
1          4889.69      18486.36

âœ“ Saved: 01_supply_chain_analysis.png


In [8]:
# ============================================================================
# SECTION 2: PRODUCTION ANALYTICS
# ============================================================================

print("\n[5/10] Analyzing Production...")
print("\n" + "="*80)
print("PRODUCTION INSIGHTS")
print("="*80)

# Convert dates
production['production_date'] = pd.to_datetime(production['production_date'])

# 2.1 Conversion Rate by Plant
production['conversion_rate'] = (production['finished_goods_mt'] / production['raw_material_used_mt'] * 100).round(2)
plant_efficiency = production.groupby('plant_location').agg({
    'conversion_rate': 'mean',
    'finished_goods_mt': 'sum',
    'batch_id': 'count'
}).round(2)
plant_efficiency.columns = ['Avg Conversion %', 'Total Output (MT)', 'Batches']
print("\nðŸ“Š Plant Efficiency:")
print(plant_efficiency.sort_values('Avg Conversion %', ascending=False))

# 2.2 Product Performance
product_prod = production.merge(products[['product_sku', 'product_name', 'category']], on='product_sku')
product_stats = product_prod.groupby(['category', 'product_name']).agg({
    'finished_goods_mt': 'sum',
    'conversion_rate': 'mean'
}).round(2)
print("\nðŸ“Š Top 5 Products by Volume:")
print(product_stats.sort_values('finished_goods_mt', ascending=False).head(5))

# 2.3 Shift Performance
shift_perf = production.groupby('shift')['conversion_rate'].mean().round(2)
print("\nðŸ“Š Shift Performance:")
print(shift_perf.sort_values(ascending=False))

# Visualization 2: Production Analysis
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Plant efficiency
plant_efficiency.sort_values('Avg Conversion %')['Avg Conversion %'].plot(
    kind='barh', ax=axes[0, 0], color='steelblue'
)
axes[0, 0].set_title('Plant Conversion Efficiency', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Conversion Rate (%)')
axes[0, 0].axvline(x=production['conversion_rate'].mean(), color='red', linestyle='--', label='Average')
axes[0, 0].legend()

# Category production
category_prod = product_prod.groupby('category')['finished_goods_mt'].sum()
axes[0, 1].pie(category_prod, labels=category_prod.index, autopct='%1.1f%%', startangle=90)
axes[0, 1].set_title('Production by Category', fontsize=14, fontweight='bold')

# Monthly production trend
monthly_prod = production.groupby(production['production_date'].dt.to_period('M'))['finished_goods_mt'].sum()
monthly_prod.index = monthly_prod.index.to_timestamp()
axes[1, 0].plot(monthly_prod.index, monthly_prod.values, marker='o', linewidth=2)
axes[1, 0].set_title('Monthly Production Trend', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Month')
axes[1, 0].set_ylabel('Output (MT)')
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].tick_params(axis='x', rotation=45)

# Shift comparison
shift_perf.plot(kind='bar', ax=axes[1, 1], color=['#2ecc71', '#3498db', '#e74c3c'])
axes[1, 1].set_title('Shift Performance Comparison', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Avg Conversion Rate (%)')
axes[1, 1].set_xlabel('Shift')
axes[1, 1].tick_params(axis='x', rotation=0)

plt.tight_layout()
plt.savefig('02_production_analysis.png', dpi=300, bbox_inches='tight')
print("\nâœ“ Saved: 02_production_analysis.png")
plt.close()



[5/10] Analyzing Production...

PRODUCTION INSIGHTS

ðŸ“Š Plant Efficiency:
                   Avg Conversion %  Total Output (MT)  Batches
plant_location                                                 
Ahmedabad Plant 1             81.52           16738.85     4097
Ahmedabad Plant 2             81.46           17657.02     4328
Rajkot Plant                  81.41           16685.85     4103

ðŸ“Š Top 5 Products by Volume:
                                             finished_goods_mt  \
category     product_name                                        
French Fries French Fries Shoestring 2.5kg             5259.35   
Patties      Burger Patty 1kg                          5211.68   
French Fries French Fries Wedges 2.5kg                 5200.81   
Specialty    Hash Browns 1kg                           5199.71   
French Fries French Fries Crinkle Cut 2.5kg            5150.12   

                                             conversion_rate  
category     product_name                    

In [9]:
# ============================================================================
# SECTION 3: QUALITY CONTROL ANALYTICS
# ============================================================================

print("\n[6/10] Analyzing Quality Control...")
print("\n" + "="*80)
print("QUALITY CONTROL INSIGHTS")
print("="*80)

# Convert dates
quality['inspection_date'] = pd.to_datetime(quality['inspection_date'])

# 3.1 BRC Compliance
brc_stats = quality['brc_compliance_score'].describe().round(2)
print("\nðŸ“Š BRC Compliance Score Statistics:")
print(brc_stats)
print(f"\nExcellent Quality Rate (90+): {(quality['brc_compliance_score'] >= 90).sum() / len(quality) * 100:.2f}%")

# 3.2 Approval Rate
approval_rate = quality['status'].value_counts()
print("\nðŸ“Š Inspection Status:")
print(approval_rate)
print(f"\nApproval Rate: {approval_rate['Approved'] / len(quality) * 100:.2f}%")

# 3.3 Quality by Plant
quality_prod = quality.merge(production[['batch_id', 'plant_location']], on='batch_id')
plant_quality = quality_prod.groupby('plant_location').agg({
    'brc_compliance_score': 'mean',
    'defect_rate': 'mean'
}).round(2)
print("\nðŸ“Š Quality by Plant:")
print(plant_quality.sort_values('brc_compliance_score', ascending=False))

# Visualization 3: Quality Analysis
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# BRC Score Distribution
axes[0, 0].hist(quality['brc_compliance_score'], bins=20, color='skyblue', edgecolor='black')
axes[0, 0].axvline(x=quality['brc_compliance_score'].mean(), color='red', linestyle='--', linewidth=2, label='Mean')
axes[0, 0].axvline(x=90, color='green', linestyle='--', linewidth=2, label='Target (90)')
axes[0, 0].set_title('BRC Compliance Score Distribution', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('BRC Score')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].legend()

# Monthly BRC Trend
monthly_brc = quality.groupby(quality['inspection_date'].dt.to_period('M'))['brc_compliance_score'].mean()
monthly_brc.index = monthly_brc.index.to_timestamp()
axes[0, 1].plot(monthly_brc.index, monthly_brc.values, marker='o', linewidth=2, color='green')
axes[0, 1].axhline(y=90, color='red', linestyle='--', label='Target')
axes[0, 1].set_title('Monthly BRC Compliance Trend', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Month')
axes[0, 1].set_ylabel('Avg BRC Score')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)
axes[0, 1].tick_params(axis='x', rotation=45)

# Status breakdown
status_counts = quality['status'].value_counts()
axes[1, 0].pie(status_counts, labels=status_counts.index, autopct='%1.1f%%', 
               colors=['#2ecc71', '#e74c3c'], startangle=90)
axes[1, 0].set_title('Inspection Status Distribution', fontsize=14, fontweight='bold')

# Plant quality comparison
plant_quality['brc_compliance_score'].sort_values().plot(kind='barh', ax=axes[1, 1], color='coral')
axes[1, 1].set_title('Average BRC Score by Plant', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Avg BRC Score')
axes[1, 1].axvline(x=90, color='green', linestyle='--', linewidth=2, label='Target')
axes[1, 1].legend()

plt.tight_layout()
plt.savefig('03_quality_analysis.png', dpi=300, bbox_inches='tight')
print("\nâœ“ Saved: 03_quality_analysis.png")
plt.close()


[6/10] Analyzing Quality Control...

QUALITY CONTROL INSIGHTS

ðŸ“Š BRC Compliance Score Statistics:
count    3727.00
mean       92.62
std         4.65
min        85.00
25%        89.00
50%        93.00
75%        97.00
max       100.00
Name: brc_compliance_score, dtype: float64

Excellent Quality Rate (90+): 68.90%

ðŸ“Š Inspection Status:
status
Approved    3549
Rejected     178
Name: count, dtype: int64

Approval Rate: 95.22%

ðŸ“Š Quality by Plant:
                   brc_compliance_score  defect_rate
plant_location                                      
Ahmedabad Plant 1                 92.77         2.77
Rajkot Plant                      92.63         2.79
Ahmedabad Plant 2                 92.46         2.74

âœ“ Saved: 03_quality_analysis.png


In [10]:
# ============================================================================
# SECTION 4: B2B CUSTOMER ANALYTICS
# ============================================================================

print("\n[7/10] Analyzing B2B Customers...")
print("\n" + "="*80)
print("B2B CUSTOMER INSIGHTS")
print("="*80)

# Convert dates
b2b_orders['order_date'] = pd.to_datetime(b2b_orders['order_date'])

# 4.1 Customer Type Analysis
customer_revenue = b2b_orders.merge(b2b_customers[['customer_id', 'customer_type', 'country']], on='customer_id')
type_revenue = customer_revenue.groupby('customer_type')['total_value_inr'].sum() / 10000000
print("\nðŸ“Š Revenue by Customer Type (Crores):")
print(type_revenue.sort_values(ascending=False).round(2))

# 4.2 Geographic Analysis
geo_revenue = customer_revenue.groupby('country')['total_value_inr'].sum() / 10000000
print("\nðŸ“Š Top 10 Countries by Revenue (Crores):")
print(geo_revenue.sort_values(ascending=False).head(10).round(2))

# 4.3 Top Customers
top_customers = customer_revenue.groupby('customer_id').agg({
    'total_value_inr': 'sum',
    'order_id': 'count'
}).sort_values('total_value_inr', ascending=False).head(10)
top_customers.columns = ['Revenue (â‚¹)', 'Orders']
top_customers['Revenue (Lakhs)'] = (top_customers['Revenue (â‚¹)'] / 100000).round(2)
print("\nðŸ“Š Top 10 Customers:")
print(top_customers[['Revenue (Lakhs)', 'Orders']])

# Visualization 4: B2B Analysis
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Customer type revenue
type_revenue.sort_values().plot(kind='barh', ax=axes[0, 0], color='teal')
axes[0, 0].set_title('Revenue by Customer Type', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Revenue (Crores)')

# Geographic distribution
geo_revenue.sort_values(ascending=False).head(10).plot(kind='bar', ax=axes[0, 1], color='purple')
axes[0, 1].set_title('Top 10 Markets by Revenue', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('Revenue (Crores)')
axes[0, 1].tick_params(axis='x', rotation=45)

# Monthly order trend
monthly_orders = customer_revenue.groupby(customer_revenue['order_date'].dt.to_period('M')).agg({
    'total_value_inr': 'sum',
    'order_id': 'count'
})
monthly_orders.index = monthly_orders.index.to_timestamp()
ax1 = axes[1, 0]
ax2 = ax1.twinx()
ax1.plot(monthly_orders.index, monthly_orders['total_value_inr'] / 10000000, 
         marker='o', color='blue', label='Revenue (Cr)', linewidth=2)
ax2.plot(monthly_orders.index, monthly_orders['order_id'], 
         marker='s', color='red', label='Orders', linewidth=2)
ax1.set_title('Monthly Revenue & Order Trend', fontsize=14, fontweight='bold')
ax1.set_xlabel('Month')
ax1.set_ylabel('Revenue (Crores)', color='blue')
ax2.set_ylabel('Number of Orders', color='red')
ax1.tick_params(axis='x', rotation=45)
ax1.grid(True, alpha=0.3)

# Payment status
payment_dist = customer_revenue['payment_status'].value_counts()
axes[1, 1].pie(payment_dist, labels=payment_dist.index, autopct='%1.1f%%', startangle=90)
axes[1, 1].set_title('Payment Status Distribution', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.savefig('04_b2b_customer_analysis.png', dpi=300, bbox_inches='tight')
print("\nâœ“ Saved: 04_b2b_customer_analysis.png")
plt.close()


[7/10] Analyzing B2B Customers...

B2B CUSTOMER INSIGHTS

ðŸ“Š Revenue by Customer Type (Crores):
customer_type
Retail Chain     33.56
QSR Chain        30.73
Export Client    27.82
Food Service     26.91
Distributor      23.10
Name: total_value_inr, dtype: float64

ðŸ“Š Top 10 Countries by Revenue (Crores):
country
Nepal           16.36
Sri Lanka       11.73
Bangladesh      11.68
Kuwait          10.99
Malaysia        10.61
UAE             10.07
Saudi Arabia     9.86
Singapore        9.65
Kenya            9.45
India            8.29
Name: total_value_inr, dtype: float64

ðŸ“Š Top 10 Customers:
             Revenue (Lakhs)  Orders
customer_id                         
C00122                141.19      30
C00091                132.48      28
C00024                130.66      30
C00154                130.05      26
C00039                129.22      29
C00133                128.42      30
C00149                128.31      29
C00064                124.45      30
C00016                123.83  

In [11]:
# ============================================================================
# SECTION 5: B2C RETAIL ANALYTICS
# ============================================================================

print("\n[8/10] Analyzing B2C Sales...")
print("\n" + "="*80)
print("B2C RETAIL INSIGHTS")
print("="*80)

# Convert dates
b2c_sales['sale_date'] = pd.to_datetime(b2c_sales['sale_date'])

# 5.1 City Performance
city_perf = b2c_sales.groupby('city').agg({
    'final_price': 'sum',
    'transaction_id': 'count',
    'discount_percent': 'mean'
}).round(2)
city_perf.columns = ['Revenue (â‚¹)', 'Transactions', 'Avg Discount %']
city_perf['Revenue (Lakhs)'] = (city_perf['Revenue (â‚¹)'] / 100000).round(2)
print("\nðŸ“Š City Performance:")
print(city_perf[['Revenue (Lakhs)', 'Transactions', 'Avg Discount %']].sort_values('Revenue (Lakhs)', ascending=False))

# 5.2 Channel Analysis
channel_perf = b2c_sales.groupby('channel')['final_price'].sum() / 100000
print("\nðŸ“Š Revenue by Channel (Lakhs):")
print(channel_perf.sort_values(ascending=False).round(2))

# 5.3 Customer Type
customer_type_dist = b2c_sales['customer_type'].value_counts()
print("\nðŸ“Š Customer Type Distribution:")
print(customer_type_dist)

# Visualization 5: B2C Analysis
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# City revenue
city_perf['Revenue (Lakhs)'].sort_values().plot(kind='barh', ax=axes[0, 0], color='orange')
axes[0, 0].set_title('Revenue by City', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Revenue (Lakhs)')

# Channel mix
channel_perf.plot(kind='pie', ax=axes[0, 1], autopct='%1.1f%%', startangle=90)
axes[0, 1].set_title('Revenue by Channel', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('')

# Monthly B2C trend
monthly_b2c = b2c_sales.groupby(b2c_sales['sale_date'].dt.to_period('M'))['final_price'].sum() / 100000
monthly_b2c.index = monthly_b2c.index.to_timestamp()
axes[1, 0].plot(monthly_b2c.index, monthly_b2c.values, marker='o', linewidth=2, color='green')
axes[1, 0].set_title('Monthly B2C Revenue Trend', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Month')
axes[1, 0].set_ylabel('Revenue (Lakhs)')
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].tick_params(axis='x', rotation=45)

# Customer type
customer_type_dist.plot(kind='bar', ax=axes[1, 1], color=['#3498db', '#e74c3c', '#2ecc71'])
axes[1, 1].set_title('Customer Type Distribution', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Number of Transactions')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig('05_b2c_analysis.png', dpi=300, bbox_inches='tight')
print("\nâœ“ Saved: 05_b2c_analysis.png")
plt.close()


[8/10] Analyzing B2C Sales...

B2C RETAIL INSIGHTS

ðŸ“Š City Performance:
             Revenue (Lakhs)  Transactions  Avg Discount %
city                                                      
Gandhinagar            48.55         18496           12.45
Ahmedabad              48.16         18258           12.52
Surat                  48.01         18337           12.52
Anand                  47.98         18255           12.49
Vadodara               47.96         18253           12.50
Bhavnagar              47.73         18206           12.50
Rajkot                 47.65         18138           12.58

ðŸ“Š Revenue by Channel (Lakhs):
channel
Online Platform    84.60
Kirana Stores      83.96
Modern Trade       83.87
Own Stores         83.61
Name: final_price, dtype: float64

ðŸ“Š Customer Type Distribution:
customer_type
Loyalty Member    42673
Regular           42655
New               42615
Name: count, dtype: int64

âœ“ Saved: 05_b2c_analysis.png


In [12]:
# ============================================================================
# SECTION 6: FINANCIAL OVERVIEW
# ============================================================================

print("\n[9/10] Financial Analysis...")
print("\n" + "="*80)
print("FINANCIAL INSIGHTS")
print("="*80)

# Convert dates
revenue['date'] = pd.to_datetime(revenue['date'])

# 6.1 Overall Performance
total_revenue = revenue['revenue_inr'].sum() / 10000000
total_margin = revenue['gross_margin_inr'].sum() / 10000000
margin_pct = (revenue['gross_margin_inr'].sum() / revenue['revenue_inr'].sum() * 100)

print(f"\nðŸ“Š Overall Performance:")
print(f"Total Revenue: â‚¹{total_revenue:.2f} Crores")
print(f"Total Gross Margin: â‚¹{total_margin:.2f} Crores")
print(f"Margin %: {margin_pct:.2f}%")

# 6.2 B2B vs B2C
source_revenue = revenue.groupby('revenue_source').agg({
    'revenue_inr': 'sum',
    'gross_margin_inr': 'sum'
}) / 10000000
source_revenue.columns = ['Revenue (Cr)', 'Margin (Cr)']
source_revenue['Margin %'] = (revenue.groupby('revenue_source')['gross_margin_inr'].sum() / 
                                revenue.groupby('revenue_source')['revenue_inr'].sum() * 100).round(2)
print("\nðŸ“Š Revenue by Source:")
print(source_revenue.round(2))

# 6.3 Category Performance
category_revenue = revenue.groupby('product_category')['revenue_inr'].sum() / 10000000
print("\nðŸ“Š Revenue by Category (Crores):")
print(category_revenue.sort_values(ascending=False).round(2))

# Visualization 6: Financial Dashboard
fig = plt.figure(figsize=(16, 10))
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# Revenue trend
ax1 = fig.add_subplot(gs[0, :])
monthly_rev = revenue.groupby(revenue['date'].dt.to_period('M')).agg({
    'revenue_inr': 'sum',
    'gross_margin_inr': 'sum'
}) / 10000000
monthly_rev.index = monthly_rev.index.to_timestamp()
ax1.plot(monthly_rev.index, monthly_rev['revenue_inr'], marker='o', linewidth=2, label='Revenue', color='blue')
ax1.plot(monthly_rev.index, monthly_rev['gross_margin_inr'], marker='s', linewidth=2, label='Gross Margin', color='green')
ax1.set_title('Monthly Revenue & Margin Trend', fontsize=16, fontweight='bold')
ax1.set_xlabel('Month')
ax1.set_ylabel('Amount (Crores)')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# B2B vs B2C
ax2 = fig.add_subplot(gs[1, 0])
source_revenue['Revenue (Cr)'].plot(kind='bar', ax=ax2, color=['#3498db', '#e74c3c'])
ax2.set_title('Revenue by Source', fontsize=14, fontweight='bold')
ax2.set_ylabel('Revenue (Crores)')
ax2.tick_params(axis='x', rotation=0)

# Category mix
ax3 = fig.add_subplot(gs[1, 1])
category_revenue.plot(kind='pie', ax=ax3, autopct='%1.1f%%', startangle=90)
ax3.set_title('Revenue by Category', fontsize=14, fontweight='bold')
ax3.set_ylabel('')

# Margin comparison
ax4 = fig.add_subplot(gs[1, 2])
source_revenue['Margin %'].plot(kind='bar', ax=ax4, color=['#2ecc71', '#f39c12'])
ax4.set_title('Margin % by Source', fontsize=14, fontweight='bold')
ax4.set_ylabel('Margin %')
ax4.tick_params(axis='x', rotation=0)
ax4.axhline(y=margin_pct, color='red', linestyle='--', label='Overall')
ax4.legend()

# Category-Source Matrix
ax5 = fig.add_subplot(gs[2, :])
cat_source = revenue.groupby(['product_category', 'revenue_source'])['revenue_inr'].sum().unstack() / 10000000
cat_source.plot(kind='bar', ax=ax5, stacked=False)
ax5.set_title('Revenue by Category & Source', fontsize=14, fontweight='bold')
ax5.set_ylabel('Revenue (Crores)')
ax5.set_xlabel('Product Category')
ax5.legend(title='Source')
ax5.tick_params(axis='x', rotation=45)

plt.savefig('06_financial_overview.png', dpi=300, bbox_inches='tight')
print("\nâœ“ Saved: 06_financial_overview.png")
plt.close()


[9/10] Financial Analysis...

FINANCIAL INSIGHTS

ðŸ“Š Overall Performance:
Total Revenue: â‚¹150.33 Crores
Total Gross Margin: â‚¹57.70 Crores
Margin %: 38.38%

ðŸ“Š Revenue by Source:
                Revenue (Cr)  Margin (Cr)  Margin %
revenue_source                                     
B2B                   142.12        54.01      38.0
B2C                     8.21         3.70      45.0

ðŸ“Š Revenue by Category (Crores):
product_category
Specialty       59.41
French Fries    45.68
Patties         45.24
Name: revenue_inr, dtype: float64

âœ“ Saved: 06_financial_overview.png


In [15]:
# ============================================================================
# SUMMARY REPORT
# ============================================================================

print("\n[10/10] Generating Summary Report...")
print("\n" + "="*80)
print("EXECUTIVE SUMMARY")
print("="*80)

print(f"""
SUPPLY CHAIN:
â€¢ {len(farmers)} farmers across {farmers['region'].nunique()} regions
â€¢ {procurement['quantity_mt'].sum():,.0f} MT total procurement
â€¢ Premium quality rate: {(procurement['quality_grade'] == 'Premium').sum() / len(procurement) * 100:.1f}%

PRODUCTION:
â€¢ {production['finished_goods_mt'].sum():,.0f} MT total production
â€¢ Average conversion rate: {production['conversion_rate'].mean():.2f}%
â€¢ {production['plant_location'].nunique()} production plants

QUALITY:
â€¢ {len(quality)} quality inspections performed
â€¢ Average BRC score: {quality['brc_compliance_score'].mean():.1f}/100
â€¢ Approval rate: {(quality['status'] == 'Approved').sum() / len(quality) * 100:.1f}%

B2B BUSINESS:
â€¢ {len(b2b_customers)} B2B customers across {b2b_customers['country'].nunique()} countries
â€¢ â‚¹{customer_revenue['total_value_inr'].sum() / 10000000:.2f} Cr total B2B revenue
â€¢ {len(b2b_orders)} orders processed

B2C BUSINESS:
â€¢ {b2c_sales['city'].nunique()} cities operational
â€¢ {len(b2c_sales):,} retail transactions
â€¢ â‚¹{b2c_sales['final_price'].sum() / 10000000:.2f} Cr total B2C revenue

FINANCIAL:
â€¢ Total Revenue: â‚¹{total_revenue:.2f} Crores
â€¢ Gross Margin: â‚¹{total_margin:.2f} Crores ({margin_pct:.2f}%)
â€¢ B2B contributes {(source_revenue.loc['B2B', 'Revenue (Cr)'] / total_revenue * 100):.1f}% of revenue
""")




[10/10] Generating Summary Report...

EXECUTIVE SUMMARY

SUPPLY CHAIN:
â€¢ 500 farmers across 5 regions
â€¢ 38,481 MT total procurement
â€¢ Premium quality rate: 24.8%

PRODUCTION:
â€¢ 51,082 MT total production
â€¢ Average conversion rate: 81.46%
â€¢ 3 production plants

QUALITY:
â€¢ 3727 quality inspections performed
â€¢ Average BRC score: 92.6/100
â€¢ Approval rate: 95.2%

B2B BUSINESS:
â€¢ 200 B2B customers across 15 countries
â€¢ â‚¹142.12 Cr total B2B revenue
â€¢ 3605 orders processed

B2C BUSINESS:
â€¢ 7 cities operational
â€¢ 127,943 retail transactions
â€¢ â‚¹3.36 Cr total B2C revenue

FINANCIAL:
â€¢ Total Revenue: â‚¹150.33 Crores
â€¢ Gross Margin: â‚¹57.70 Crores (38.38%)
â€¢ B2B contributes 94.5% of revenue

