<a href="https://colab.research.google.com/github/iamjustkay/Environment_Impact_of_Food_Production_Analysis/blob/main/Environment_Impact_of_Food_Production_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install -U kaleido



In [2]:
# Notebook: Environment Impact of Food Production — Analysis & Plotly Visualizations
# Save as: env_food_analysis.ipynb or run in a Jupyter cell-by-cell
# Requirements:
# pip install pandas numpy plotly kaleido python-pptx

# %%
# 0) Imports & settings
import os
import math
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

In [3]:
# Upgrade Plotly to a version compatible with Kaleido
!pip install --upgrade plotly>=6.1.1

In [4]:
# Ensure plotly can export static images (kaleido)
pio.kaleido.scope.default_format = "png"
pio.kaleido.scope.default_width = 1200
pio.kaleido.scope.default_height = 600

# Directory for outputs
OUT_DIR = "Charts"
os.makedirs(OUT_DIR, exist_ok=True)

# Pretty printing pandas
pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 120)



Use of plotly.io.kaleido.scope.default_format is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_format instead.




Use of plotly.io.kaleido.scope.default_width is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_width instead.




Use of plotly.io.kaleido.scope.default_height is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_height instead.




## Business Questions
* Q1. Which foods have the highest greenhouse gas emissions per kilogram?
* Q2. On average, which lifecycle stages drive emissions across foods?
* Q3. For the top 5 emitting foods, how do lifecycle stages break down?
* Q4. Which foods are both water-intensive and high-emissions?
* Q5. Which foods use the most land per kilogram?
* Q6. How closely are scarcity-weighted water use and emissions related?
* Q7. What’s the impact if we cut Transport & Packaging emissions by 50%?

In [5]:
df = pd.read_csv("Food_Production.csv")

In [6]:
# Quick preview
print("Rows, columns:", df.shape)
display(df.head())

Rows, columns: (43, 23)


Unnamed: 0,Food product,Land use change,Animal Feed,Farm,Processing,Transport,Packging,Retail,Total_emissions,Eutrophying emissions per 1000kcal (gPO₄eq per 1000kcal),Eutrophying emissions per kilogram (gPO₄eq per kilogram),Eutrophying emissions per 100g protein (gPO₄eq per 100 grams protein),Freshwater withdrawals per 1000kcal (liters per 1000kcal),Freshwater withdrawals per 100g protein (liters per 100g protein),Freshwater withdrawals per kilogram (liters per kilogram),Greenhouse gas emissions per 1000kcal (kgCO₂eq per 1000kcal),Greenhouse gas emissions per 100g protein (kgCO₂eq per 100g protein),Land use per 1000kcal (m² per 1000kcal),Land use per kilogram (m² per kilogram),Land use per 100g protein (m² per 100g protein),Scarcity-weighted water use per kilogram (liters per kilogram),Scarcity-weighted water use per 100g protein (liters per 100g protein),Scarcity-weighted water use per 1000kcal (liters per 1000 kilocalories)
0,Wheat & Rye (Bread),0.1,0.0,0.8,0.2,0.1,0.1,0.1,1.4,,,,,,,,,,,,,,
1,Maize (Meal),0.3,0.0,0.5,0.1,0.1,0.1,0.0,1.1,,,,,,,,,,,,,,
2,Barley (Beer),0.0,0.0,0.2,0.1,0.0,0.5,0.3,1.1,,,,,,,,,,,,,,
3,Oatmeal,0.0,0.0,1.4,0.0,0.1,0.1,0.0,1.6,4.281357,11.23,8.638462,183.911552,371.076923,482.4,0.945482,1.907692,2.897446,7.6,5.846154,18786.2,14450.92308,7162.104461
4,Rice,0.0,0.0,3.6,0.1,0.1,0.1,0.1,4.0,9.514379,35.07,49.394366,609.983722,3166.760563,2248.4,1.207271,6.267606,0.759631,2.8,3.943662,49576.3,69825.77465,13449.89148


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43 entries, 0 to 42
Data columns (total 23 columns):
 #   Column                                                                   Non-Null Count  Dtype  
---  ------                                                                   --------------  -----  
 0   Food product                                                             43 non-null     object 
 1   Land use change                                                          43 non-null     float64
 2   Animal Feed                                                              43 non-null     float64
 3   Farm                                                                     43 non-null     float64
 4   Processing                                                               43 non-null     float64
 5   Transport                                                                43 non-null     float64
 6   Packging                                                                 43 

In [8]:
# Fix typo in column
df = df.rename(columns={"Packging": "Packaging"})

# Ensure numeric columns are parsed
for c in df.columns:
    if c != "Food product":
        df[c] = pd.to_numeric(df[c], errors="coerce")

# Lifecycle stage columns
stage_cols = ["Land use change", "Animal Feed", "Farm", "Processing", "Transport", "Packaging", "Retail"]
stage_cols = [c for c in stage_cols if c in df.columns]

In [9]:
# Compute clean total emissions (sum of lifecycle stages if needed)
if "Total_emissions" in df.columns:
    stage_sum = df[stage_cols].sum(axis=1, skipna=True)
    discrepancy = (df["Total_emissions"] - stage_sum).abs()
    df["Total_emissions_clean"] = np.where(discrepancy > 1e-6, stage_sum, df["Total_emissions"].fillna(stage_sum))
else:
    df["Total_emissions_clean"] = df[stage_cols].sum(axis=1, skipna=True)

In [10]:
# Helpers
def top_n(col, n=10, ascending=False):
    return df[["Food product", col]].dropna().sort_values(col, ascending=ascending).head(n)

In [12]:
def save_fig(fig, name):
    """Save interactive PNG snapshot in OUT_DIR with given name (no ext)."""
    png_path = os.path.join(OUT_DIR, f"{name}.png")
    # save a static PNG using kaleido
    try:
        fig.write_image(png_path)
    except Exception as e:
        print("Warning: could not write PNG (kaleido issue):", e)
    print("Saved:", png_path)
    return png_path

# Format helper for label text template
def format_text_template(fmt="{:.2f}"):
    # Plotly uses %{text:.2f} format but we'll pass numbers formatted
    return fmt

In [13]:
# Q1. Which foods have the highest greenhouse gas emissions per kilogram?
q1 = top_n("Total_emissions_clean", 10, ascending=False).sort_values("Total_emissions_clean", ascending=True)
fig1 = px.bar(
    q1, x="Total_emissions_clean", y="Food product", orientation="h",
    text="Total_emissions_clean",
    title="Q1. Highest greenhouse gas emissions per kg (Top 10 foods)",
    labels={"Total_emissions_clean": "kgCO₂e per kg"}
)
fig1.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig1.show()


In [14]:
# Q2. On average, which lifecycle stages drive emissions across foods?
stage_means = df[stage_cols].mean().reset_index()
stage_means.columns = ["Stage", "kgCO2e_per_kg"]
fig2 = px.bar(
    stage_means.sort_values("kgCO2e_per_kg", ascending=False),
    x="Stage", y="kgCO2e_per_kg", text="kgCO2e_per_kg",
    title="Q2. Average emissions by lifecycle stage",
    labels={"kgCO2e_per_kg": "Avg. kgCO₂e per kg"}
)
fig2.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig2.show()

In [15]:
# Q3. Lifecycle stage breakdown for top 5 emitting foods
top5 = top_n("Total_emissions_clean", 5, ascending=False)["Food product"]
breakdown = df[df["Food product"].isin(top5)][["Food product"] + stage_cols]
long = breakdown.melt(id_vars="Food product", var_name="Stage", value_name="kgCO2e_per_kg")
fig3 = px.bar(
    long, x="kgCO2e_per_kg", y="Food product", color="Stage", orientation="h", barmode="stack",
    text="kgCO2e_per_kg",
    title="Q3. Lifecycle breakdown for top 5 emitting foods",
    labels={"kgCO2e_per_kg": "kgCO₂e per kg"}
)
fig3.update_traces(texttemplate="%{text:.2f}", textposition="inside")
fig3.show()

In [24]:
# Q4. Freshwater withdrawals vs GHG emissions
water_col = "Freshwater withdrawals per kilogram (liters per kilogram)"
if water_col in df.columns:
    scatter_df = df[["Food product", "Total_emissions_clean", water_col]].dropna()
    fig4 = px.scatter(
        scatter_df, x=water_col, y="Total_emissions_clean", text="Food product",
        title="Q4. Freshwater withdrawals vs. GHG emissions",
        labels={"Total_emissions_clean": "kgCO₂e per kg", water_col: "Freshwater per kg (L)"}

    )
    fig4.update_traces(textposition="top center")
    fig4.show()
#text="Food product",

In [19]:
# Q5. Which foods use the most land per kilogram?
land_col = "Land use per kilogram (m² per kilogram)"
if land_col in df.columns:
    q5 = top_n(land_col, 10, ascending=False).sort_values(land_col, ascending=True)
    fig5 = px.bar(
        q5, x=land_col, y="Food product", orientation="h", text=land_col,
        title="Q5. Highest land use per kg (Top 10 foods)",
        labels={land_col: "m² land per kg"}
    )
    fig5.update_traces(texttemplate="%{text:.2f}", textposition="outside")
    fig5.show()

In [20]:
# Q6. Emissions vs scarcity-weighted water use
scarcity_col = "Scarcity-weighted water use per kilogram (liters per kilogram)"
if scarcity_col in df.columns:
    corr_df = df[["Total_emissions_clean", scarcity_col]].dropna()
    corr = corr_df["Total_emissions_clean"].corr(corr_df[scarcity_col])
    fig6 = px.scatter(
        corr_df, x=scarcity_col, y="Total_emissions_clean",
        title=f"Q6. Emissions vs. scarcity-weighted water use (r = {corr:.2f})",
        labels={"Total_emissions_clean": "kgCO₂e per kg", scarcity_col: "Scarcity-weighted water (L/kg)"}
    )
    fig6.show()

In [23]:
# Q7. Policy scenario: cut Transport & Packaging by 50%
scenario_cols = [c for c in ["Transport", "Packaging"] if c in df.columns]
if scenario_cols:
    scenario = df.copy()
    scenario["Scenario_emissions"] = df["Total_emissions_clean"]
    for c in scenario_cols:
        scenario["Scenario_emissions"] = scenario["Scenario_emissions"] - 0.5 * scenario[c].fillna(0)
    scenario["Reduction"] = df["Total_emissions_clean"] - scenario["Scenario_emissions"]
    q7 = scenario[["Food product", "Reduction"]].dropna().sort_values("Reduction", ascending=False).head(10)
    q7 = q7.sort_values("Reduction", ascending=True)
    fig7 = px.bar(
        q7, x="Reduction", y="Food product", orientation="h", text="Reduction",
        title="Q7. Reduction if Transport & Packaging cut by 50% (Top 10 foods)",
        labels={"Reduction": "kgCO₂e saved per kg"}
    )
    fig7.update_traces(texttemplate="%{text:.2f}", textposition="outside")
    fig7.show()