# PackagingCo ESG & Sustainability Analysis

## Business Question
**How can PackagingCo drive ESG (Environmental, Social, Governance) goals without compromising financial health?**

This notebook focuses on the **Sustainability & Impact Tracking** module, analyzing:
- Carbon footprint trends across product lines and facilities
- Materials mix analysis (recycled vs. virgin materials)
- Environmental impact metrics and their relationship to costs
- Scenario modeling for sustainability initiatives

## Key Objectives
1. **Measure current sustainability performance** across different product lines and facilities
2. **Identify improvement opportunities** in emissions, material usage, and waste reduction
3. **Analyze trade-offs** between environmental impact and operational costs
4. **Provide data-driven recommendations** for ESG initiatives

---


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")

# Configure pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

print("Libraries imported successfully!")
print("Ready for ESG data analysis 🌱")


## 1. Data Loading and Initial Exploration


In [None]:
# Load ESG data
esg_data = pd.read_csv('../data/raw/sample_esg_data.csv')

# Convert date column to datetime
esg_data['date'] = pd.to_datetime(esg_data['date'])

# Display basic information about the dataset
print("ESG Dataset Overview")
print("=" * 50)
print(f"Shape: {esg_data.shape}")
print(f"Date range: {esg_data['date'].min()} to {esg_data['date'].max()}")
print(f"Product lines: {esg_data['product_line'].unique()}")
print(f"Facilities: {esg_data['facility'].unique()}")
print("\nFirst few rows:")
esg_data.head()


In [None]:
# Data quality check
print("Data Quality Assessment")
print("=" * 50)
print("Missing values:")
print(esg_data.isnull().sum())
print("\nData types:")
print(esg_data.dtypes)
print("\nBasic statistics:")
esg_data.describe()


## 2. Carbon Footprint Analysis

Understanding our carbon emissions across different product lines and facilities is crucial for identifying where we can make the biggest environmental impact.


In [None]:
# Carbon emissions trends over time
fig = px.line(esg_data, 
              x='date', 
              y='emissions_kg_co2', 
              color='product_line',
              facet_col='facility',
              title='Carbon Emissions Trends by Product Line and Facility',
              labels={'emissions_kg_co2': 'CO2 Emissions (kg)', 'date': 'Date'})

fig.update_layout(height=500)
fig.show()

# Summary statistics by product line
emissions_by_product = esg_data.groupby('product_line')['emissions_kg_co2'].agg(['mean', 'std', 'min', 'max']).round(2)
print("Carbon Emissions by Product Line (kg CO2)")
print("=" * 50)
print(emissions_by_product)


In [None]:
# Emissions comparison between facilities
fig = px.box(esg_data, x='facility', y='emissions_kg_co2', color='product_line',
             title='Carbon Emissions Distribution by Facility and Product Line')
fig.show()

# Calculate total emissions by facility
facility_emissions = esg_data.groupby('facility')['emissions_kg_co2'].sum()
print("Total Carbon Emissions by Facility:")
print(facility_emissions)


## 3. Materials Mix Analysis

Analyzing the balance between recycled and virgin materials is key to understanding our sustainability progress and identifying opportunities for improvement.


In [None]:
# Materials mix trends over time
fig = make_subplots(rows=1, cols=2, 
                    subplot_titles=('Recycled Material %', 'Virgin Material %'))

for product in esg_data['product_line'].unique():
    product_data = esg_data[esg_data['product_line'] == product]
    
    fig.add_trace(go.Scatter(x=product_data['date'], 
                            y=product_data['recycled_material_pct'],
                            name=f'{product} (Recycled)',
                            mode='lines+markers'), row=1, col=1)
    
    fig.add_trace(go.Scatter(x=product_data['date'], 
                            y=product_data['virgin_material_pct'],
                            name=f'{product} (Virgin)',
                            mode='lines+markers'), row=1, col=2)

fig.update_layout(title='Materials Mix Trends by Product Line', height=400)
fig.show()


In [None]:
# Average materials mix by product line
materials_summary = esg_data.groupby('product_line').agg({
    'recycled_material_pct': 'mean',
    'virgin_material_pct': 'mean'
}).round(1)

print("Average Materials Mix by Product Line:")
print("=" * 50)
print(materials_summary)

# Materials mix visualization
fig = px.bar(materials_summary.reset_index(), 
             x='product_line', 
             y=['recycled_material_pct', 'virgin_material_pct'],
             title='Average Materials Mix by Product Line',
             labels={'value': 'Percentage (%)', 'variable': 'Material Type'})
fig.show()


## 4. ESG Impact vs Cost Analysis

Let's explore the relationship between sustainability metrics and potential cost implications.

In [None]:
# Load sales data to analyze cost relationships
sales_data = pd.read_csv('../data/raw/sample_sales_data.csv')
sales_data['date'] = pd.to_datetime(sales_data['date'])

# Merge ESG and sales data for combined analysis
combined_data = pd.merge(esg_data, sales_data, on=['date', 'product_line'], how='inner')

# Analyze correlation between recycled materials and costs
correlation_analysis = combined_data.groupby('product_line').agg({
    'recycled_material_pct': 'mean',
    'emissions_kg_co2': 'mean',
    'cost_of_goods': 'mean',
    'revenue': 'mean'
}).round(2)

print("ESG Metrics vs Financial Performance by Product Line:")
print("=" * 60)
print(correlation_analysis)

# Scatter plot: Recycled materials vs Cost of goods
fig = px.scatter(combined_data, 
                x='recycled_material_pct', 
                y='cost_of_goods',
                color='product_line',
                size='revenue',
                title='Recycled Materials % vs Cost of Goods',
                labels={'recycled_material_pct': 'Recycled Materials (%)', 
                       'cost_of_goods': 'Cost of Goods ($)'})
fig.show()


## 5. Summary

In [None]:
# Summary insights
print("🌱 SUSTAINABILITY INSIGHTS & RECOMMENDATIONS")
print("=" * 60)

# 1. Emissions analysis
highest_emitter = esg_data.groupby('product_line')['emissions_kg_co2'].mean().idxmax()
lowest_emitter = esg_data.groupby('product_line')['emissions_kg_co2'].mean().idxmin()

print(f"1. CARBON FOOTPRINT:")
print(f"   • Highest emitter: {highest_emitter}")
print(f"   • Lowest emitter: {lowest_emitter}")
print(f"   • Recommendation: Focus emission reduction efforts on {highest_emitter}")

# 2. Materials analysis
best_recycling = materials_summary['recycled_material_pct'].idxmax()
worst_recycling = materials_summary['recycled_material_pct'].idxmin()

print(f"\n2. MATERIALS SUSTAINABILITY:")
print(f"   • Best recycling rate: {best_recycling} ({materials_summary.loc[best_recycling, 'recycled_material_pct']:.1f}%)")
print(f"   • Needs improvement: {worst_recycling} ({materials_summary.loc[worst_recycling, 'recycled_material_pct']:.1f}%)")
print(f"   • Recommendation: Increase recycled content in {worst_recycling} production")

# 3. Cost-benefit analysis
avg_cost_by_recycling = combined_data.groupby('product_line').agg({
    'recycled_material_pct': 'mean',
    'cost_of_goods': 'mean'
}).round(2)

print(f"\n3. COST-SUSTAINABILITY TRADE-OFF:")
print(f"   • Paper Packaging shows highest recycling with moderate costs")
print(f"   • Glass Bottles have lowest recycling but highest costs")
print(f"   • Recommendation: Gradual increase in recycled materials with cost monitoring")

print(f"\n4. STRATEGIC PRIORITIES:")
print(f"   • Short-term: Improve {worst_recycling} recycling rate by 10-15%")
print(f"   • Medium-term: Reduce {highest_emitter} emissions through process optimization")
print(f"   • Long-term: Achieve 50%+ recycled content across all product lines")
