In [3]:
# ============================================
# DataCharts
# From: education_conflict_merged.csv
# Keeps all countries and years (2010–2024)
# ============================================
from google.colab import files
import pandas as pd
from pathlib import Path

# ----------------------------
# 1.Load the cleaned dataset
# ----------------------------
print("Please upload 'education_conflict_merged.csv'")
uploaded = files.upload()
file_name = list(uploaded.keys())[0]
df = pd.read_csv(file_name)

# Make sure year is numeric
df["year"] = pd.to_numeric(df["year"], errors="coerce")

# Optional: filter to a smaller set of countries for clear charts
countries = [
    "Afghanistan", "Syria", "Yemen", "Sudan", "Ukraine", "Palestine",
    "Germany", "Canada", "Japan", "Brazil", "Kenya"
]
filtered_df = df[df["country"].isin(countries)].copy()

# ----------------------------
# 2.Create output folder
# ----------------------------
Path("data_charts").mkdir(exist_ok=True)

# ----------------------------
# 3.BAR CHART – Average (2018–2023) per country
# ----------------------------
bar = (
    filtered_df[(filtered_df["year"] >= 2018) & (filtered_df["year"] <= 2023)]
    .groupby(["country", "conflict_status"], as_index=False)["out_of_school_pct"]
    .mean()
    .sort_values("out_of_school_pct", ascending=False)
)
bar.to_csv("data_charts/bar_out_of_school.csv", index=False)
print("bar_out_of_school.csv created")

# ----------------------------
# 4.GROUPED BAR – Yearly values by country (all years)
# ----------------------------
grouped = filtered_df[["country", "year", "conflict_status", "out_of_school_pct"]].copy()
grouped.to_csv("data_charts/grouped_country_year.csv", index=False)
print("grouped_country_year.csv created")

# ----------------------------
# 5.HEATMAP – Country vs Year (full time range)
# ----------------------------
heatmap = (
    filtered_df.pivot_table(index="country", columns="year", values="out_of_school_pct")
    .fillna(0)
)
heatmap.to_csv("data_charts/heatmap_out_of_school.csv")
print("heatmap_out_of_school.csv created")

# ----------------------------
# 6.100% STACKED BAR – Share of conflict vs stable countries per year
# ----------------------------
stacked = (
    df.groupby(["year", "conflict_status"])["country"]
    .nunique()
    .reset_index()
    .pivot(index="year", columns="conflict_status", values="country")
    .fillna(0)
)
stacked["total"] = stacked.sum(axis=1)
stacked["conflict_share"] = (stacked.get("Conflict", 0) / stacked["total"]) * 100
stacked["stable_share"] = (stacked.get("Stable", 0) / stacked["total"]) * 100
stacked100 = stacked[["conflict_share", "stable_share"]].reset_index()
stacked100.to_csv("data_charts/stacked100_conflict_share.csv", index=False)
print("stacked100_conflict_share.csv created")

# ----------------------------
# 7.WAFFLE CHART – Global averages for all years
# ----------------------------
waffle = (
    df.groupby("conflict_status", as_index=False)["out_of_school_pct"]
    .mean()
    .rename(columns={"out_of_school_pct": "avg_out_of_school_pct"})
)
waffle.to_csv("data_charts/waffle_global_avg.csv", index=False)
print("waffle_global_avg.csv created")

print("\n All visualization datasets successfully created in 'data_charts/' folder.")

Please upload 'education_conflict_merged.csv'


Saving education_conflict_merged.csv to education_conflict_merged.csv
bar_out_of_school.csv created
grouped_country_year.csv created
heatmap_out_of_school.csv created
stacked100_conflict_share.csv created
waffle_global_avg.csv created

 All visualization datasets successfully created in 'data_charts/' folder.
