File for testing the idea

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
import ffmpeg
from pathlib import Path

In [2]:
df_population = pd.read_csv("./population.csv")
df_population.sample(4)

Unnamed: 0,Entity,Year,all years
8484,Kenya,1998,28888012
12716,Northern America (UN),2012,352164242
11986,Nepal,2022,29715383
12120,New Caledonia,2008,253784


In [3]:
years = list(df_population.Year.unique())

In [4]:
# all_regions = list(df_population.Entity.unique())
# all_regions

In [5]:
non_countries = [
    "Africa (UN)",
    "Americas (UN)",
    "Asia (UN)",
    "Europe (UN)",
    "High-income countries",
    "Land-locked developing countries (LLDC)",
    "Latin America and the Caribbean (UN)",
    "Least developed countries",
    "Less developed regions",
    "Less developed regions, excluding China",
    "Less developed regions, excluding least developed countries",
    "Low-income countries",
    "Lower-middle-income countries",
    "More developed regions",
    "Northern America (UN)",
    "Oceania (UN)",
    "Small island developing states (SIDS)",
    "Upper-middle-income countries",
    "World",
]

In [6]:
def create_df_top_ten_countries(df, year, non_countries):
    df_filtered = df[df["Year"] == year].copy()
    total_population = df_filtered.loc[
        df_filtered["Entity"] == "World", "all years"
    ].iloc[0]
    top_ten_df = (
        df_filtered[~df_filtered["Entity"].isin(non_countries)]
        .sort_values(by="all years", ascending=False)
        .head(10)
        .rename(columns={"Entity": "Country", "all years": "Population"})
        .drop(columns=["Year"])
    )
    top_ten_df["Percent of Total"] = round(
        top_ten_df["Population"] * 100 / total_population, 2
    )

    return top_ten_df.reset_index(drop=True), total_population

In [7]:
def assign_colors(df, base_palette="tab20"):
    countries = df["Entity"].unique()
    item_specific_palette = sns.color_palette(base_palette, len(countries))
    color_map = dict(zip(countries, item_specific_palette))
    return color_map

In [8]:
def _format_big_number_for_axis(x, pos):
    """argument pos is needed to feed to `ticker.FuncFormatter`"""
    if x >= 1e9:
        return f"{x/1e9:.1f}B"
    return f"{int(x/1e6)}M"


def plot_top_ten(df_year, year, historical_max, color_map):
    # --- Figure & axes ---
    fig = plt.figure(figsize=(9, 16))
    # Lock the axes rectangle: [left, bottom, width, height]
    ax = fig.add_axes([0.3, 0.08, 0.65, 0.84])

    with plt.xkcd():
        sns.barplot(
            data=df_year,
            y="Country",
            x="Population",
            hue="Country",
            palette=color_map,
            dodge=False,
            legend=False,
            ax=ax,
        )

        # --- Format X-axis ---
        ax.xaxis.set_major_formatter(ticker.FuncFormatter(_format_big_number_for_axis))
        ax.set_xlim(0, historical_max)

        # --- Pin y-axis spine ---
        ax.spines["left"].set_position(("axes", 0))  # exact left of axes
        ax.tick_params(
            axis="y", which="major", pad=20
        )  # space between labels and ticks

        # --- Labels & title ---
        ax.set_xlabel("Population", fontsize=20)
        ax.set_ylabel("")
        ax.set_title(f"Top 10 countries - {year}", fontsize=24, weight="bold")

        # --- Tick label size ---
        ax.tick_params(axis="both", which="major", labelsize=18)

        # --- Save figure ---
        filename = f"./results/world_population_{year}.png"
        plt.savefig(filename, dpi=300)
        plt.close()

In [9]:
color_map = assign_colors(df_population)

In [10]:
year = 1966
df_year, world_population = create_df_top_ten_countries(
    df_population, year, non_countries
)
plot_top_ten(df_year, year, 1500000000, color_map)

findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
fin

In [11]:
for year in years:
    df_year, world_population = create_df_top_ten_countries(
        df_population, year, non_countries
    )
    plot_top_ten(df_year, year, 1500000000, color_map)

findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
findfont: Font family 'Comic Neue' not found.
findfont: Font family 'Comic Sans MS' not found.
findfont: Font family 'xkcd' not found.
findfont: Font family 'xkcd Script' not found.
fin

In [12]:
def images_to_video(
    folder="./results", output="./videos/world_population.mp4", duration=0.2
):
    """
    Turns all PNG images in a folder into a video using ffmpeg-python.

    Args:
        folder (str): Folder with PNG images.
        output (str): Output video filename.
        duration (int or float): Seconds per image.
    """
    folder = Path(folder).resolve()
    output = Path(output).resolve()

    files = sorted([f for f in folder.iterdir() if f.suffix.lower() == ".png"])

    if not files:
        raise ValueError(f"No PNG files found in {folder}")

    list_file = folder / "images.txt"
    with list_file.open("w") as f:
        for img in files:
            f.write(f"file '{img.resolve().as_posix()}'\n")
            f.write(f"duration {duration}\n")

        # Ensure last frame is shown (ffmpeg quirk: needs last file repeated)
        f.write(f"file '{files[-1].resolve().as_posix()}'\n")

    (
        ffmpeg.input(str(list_file), format="concat", safe=0)
        .output(
            str(output),
            vcodec="libx264",
            pix_fmt="yuv420p",
            vf="scale=trunc(iw/2)*2:trunc(ih/2)*2",
        )
        .run(overwrite_output=True)
    )

    list_file.unlink()

In [13]:
images_to_video()

ffmpeg version 6.1.1-3ubuntu5 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
  configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --ena