# Project One

### Author：

In [None]:
!pip install wbdata

## 1A Population Statistics

In [None]:
import wbdata
import pandas as pd

def population(year, sex, age_range, place):
    """
    Return population count for a given year, sex, age_range, and place (WDI via wbdata).

    sex: 'people'/'male'/'female'  (case-insensitive, can also pass 'males','females')
    age_range: must be one of (0,14), (15,64), (65,100), or (0,100)
    place: World Bank code, e.g. 'CUB','USA','WLD','HIC', etc.

    Uses coarse WDI age bins (0-14, 15-64, 65+), so arbitrary ranges are not supported.
    """

    indicators = {
        "SP.POP.TOTL": "total",
        "SP.POP.TOTL.MA.ZS": "male_pct",
        "SP.POP.TOTL.FE.ZS": "female_pct",
        "SP.POP.0014.TO.ZS": "age_0_14_pct",
        "SP.POP.1564.TO.ZS": "age_15_64_pct",
        "SP.POP.65UP.TO.ZS": "age_65_up_pct",
    }

    df = wbdata.get_dataframe(indicators, country=place)
    df.index = pd.to_datetime(df.index).year
    df = df.sort_index()

    if df.empty or year not in df.index:
        raise ValueError(f"No data for place={place}, year={year}")

    row = df.loc[year]
    total = float(row["total"])

    # sex share
    s = str(sex).lower()
    if s in ["people", "all", "total"]:
        sex_share = 1.0
    elif s in ["male", "males", "man", "men"]:
        sex_share = float(row["male_pct"]) / 100.0
    elif s in ["female", "females", "woman", "women"]:
        sex_share = float(row["female_pct"]) / 100.0
    else:
        raise ValueError("sex must be people/male/female")

    # age share
    low, high = age_range
    if (low, high) == (0, 14):
        age_share = float(row["age_0_14_pct"]) / 100.0
    elif (low, high) == (15, 64):
        age_share = float(row["age_15_64_pct"]) / 100.0
    elif (low, high) == (65, 100):
        age_share = float(row["age_65_up_pct"]) / 100.0
    elif (low, high) == (0, 100):
        age_share = 1.0
    else:
        raise ValueError("age_range must be (0,14), (15,64), (65,100), or (0,100)")

    return total * sex_share * age_share

## 2A Unit Test

In [None]:
# A2-1: basic sanity: total world population should be in billions
t_wld = population(2000, "people", (0,100), "WLD")
assert t_wld > 5e9, "World population in 2000 should be > 5 billion"

# A2-2: male/female shares should sum close to total
m_wld = population(2000, "male", (0,100), "WLD")
f_wld = population(2000, "female", (0,100), "WLD")
assert abs((m_wld + f_wld) - t_wld) / t_wld < 0.03, "Male+Female should be close to total"

# A2-3: male should be between 45% and 55% of total (very safe band)
assert 0.45 < m_wld / t_wld < 0.55, "Male share should be around 50%"

## 3A Population Dataframe

In [None]:
import pandas as pd

def population_dataframe(places, years):
    """
    Return a DataFrame indexed by (place, year).
    Columns are population counts for age-sex groups (3 age bins x 2 sexes).
    """
    rows = []
    for p in places:
        for y in years:
            rows.append({
                "place": p,
                "year": y,
                "Male_0_14": population(y, "male", (0,14), p),
                "Male_15_64": population(y, "male", (15,64), p),
                "Male_65_100": population(y, "male", (65,100), p),
                "Female_0_14": population(y, "female", (0,14), p),
                "Female_15_64": population(y, "female", (15,64), p),
                "Female_65_100": population(y, "female", (65,100), p),
            })
    return pd.DataFrame(rows).set_index(["place", "year"]).sort_index()

In [None]:
df = population_dataframe(
    places=["CUB", "USA"],
    years=range(1960, 2024)
)
df.head()

## B Population Pyramids

In [None]:
import plotly.offline as py
import plotly.graph_objs as go

py.init_notebook_mode(connected=True)

def plot_population_pyramid(df, place, year):
    year_df = df.loc[(place, year)]

    age_groups = ["0–14", "15–64", "65+"]

    male_counts = [
        -year_df["Male_0_14"],
        -year_df["Male_15_64"],
        -year_df["Male_65_100"],
    ]

    female_counts = [
        year_df["Female_0_14"],
        year_df["Female_15_64"],
        year_df["Female_65_100"],
    ]

    layout = go.Layout(
        barmode="overlay",
        title=f"Population Pyramid for {place} ({year})",
        yaxis=go.layout.YAxis(title="Age Group"),
        xaxis=go.layout.XAxis(title="Number"),
        bargap=0.15
    )

    bins = [
        go.Bar(
            x=male_counts,
            y=age_groups,
            orientation="h",
            name="Men",
            marker=dict(color="purple"),
            hoverinfo="skip"
        ),
        go.Bar(
            x=female_counts,
            y=age_groups,
            orientation="h",
            name="Women",
            marker=dict(color="pink"),
            hoverinfo="skip"
        )
    ]

    py.iplot(dict(data=bins, layout=layout))


plot_population_pyramid(df, "CUB", 1960)
plot_population_pyramid(df, "CUB", 1962)