# Project One

### Author：

In [2]:
!pip install wbdata

Collecting wbdata
  Using cached wbdata-1.1.0-py3-none-any.whl.metadata (2.1 kB)
Collecting appdirs<2,>=1.4 (from wbdata)
  Using cached appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting cachetools<6,>=5.3.2 (from wbdata)
  Using cached cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)
Collecting dateparser<2,>=1.2.0 (from wbdata)
  Using cached dateparser-1.2.2-py3-none-any.whl.metadata (29 kB)
Collecting shelved-cache<0.4,>=0.3.1 (from wbdata)
  Using cached shelved_cache-0.3.1-py3-none-any.whl.metadata (4.7 kB)
Collecting tabulate<1,>=0.8.5 (from wbdata)
  Using cached tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting tzlocal>=0.2 (from dateparser<2,>=1.2.0->wbdata)
  Using cached tzlocal-5.3.1-py3-none-any.whl.metadata (7.6 kB)
Using cached wbdata-1.1.0-py3-none-any.whl (18 kB)
Using cached appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Using cached cachetools-5.5.2-py3-none-any.whl (10 kB)
Using cached dateparser-1.2.2-py3-none-any.whl (315 kB)
Using cached 

## 1A Population Statistics

In [5]:
import wbdata
import pandas as pd

def population(year, sex, age_range, place):
    """
    Return population count for a given year, sex, age_range, and place (WDI via wbdata).

    sex: 'people'/'male'/'female'  (case-insensitive, can also pass 'males','females')
    age_range: must be one of (0,14), (15,64), (65,100), or (0,100)
    place: World Bank code, e.g. 'CUB','USA','WLD','HIC', etc.

    Uses coarse WDI age bins (0-14, 15-64, 65+), so arbitrary ranges are not supported.
    """

    indicators = {
        "SP.POP.TOTL": "total",
        "SP.POP.TOTL.MA.ZS": "male_pct",
        "SP.POP.TOTL.FE.ZS": "female_pct",
        "SP.POP.0014.TO.ZS": "age_0_14_pct",
        "SP.POP.1564.TO.ZS": "age_15_64_pct",
        "SP.POP.65UP.TO.ZS": "age_65_up_pct",
    }

    df = wbdata.get_dataframe(indicators, country=place)
    df.index = pd.to_datetime(df.index).year
    df = df.sort_index()

    if df.empty or year not in df.index:
        raise ValueError(f"No data for place={place}, year={year}")

    row = df.loc[year]
    total = float(row["total"])

    # sex share
    s = str(sex).lower()
    if s in ["people", "all", "total"]:
        sex_share = 1.0
    elif s in ["male", "males", "man", "men"]:
        sex_share = float(row["male_pct"]) / 100.0
    elif s in ["female", "females", "woman", "women"]:
        sex_share = float(row["female_pct"]) / 100.0
    else:
        raise ValueError("sex must be people/male/female")

    # age share
    low, high = age_range
    if (low, high) == (0, 14):
        age_share = float(row["age_0_14_pct"]) / 100.0
    elif (low, high) == (15, 64):
        age_share = float(row["age_15_64_pct"]) / 100.0
    elif (low, high) == (65, 100):
        age_share = float(row["age_65_up_pct"]) / 100.0
    elif (low, high) == (0, 100):
        age_share = 1.0
    else:
        raise ValueError("age_range must be (0,14), (15,64), (65,100), or (0,100)")

    return total * sex_share * age_share

## 2A Unit Test

In [8]:
# A2-1: basic sanity: total world population should be in billions
t_wld = population(2000, "people", (0,100), "WLD")
assert t_wld > 5e9, "World population in 2000 should be > 5 billion"

# A2-2: male/female shares should sum close to total
m_wld = population(2000, "male", (0,100), "WLD")
f_wld = population(2000, "female", (0,100), "WLD")
assert abs((m_wld + f_wld) - t_wld) / t_wld < 0.03, "Male+Female should be close to total"

# A2-3: male should be between 45% and 55% of total (very safe band)
assert 0.45 < m_wld / t_wld < 0.55, "Male share should be around 50%"

## 3A Population Dataframe

In [11]:
import pandas as pd

def population_dataframe(places, years):
    """
    Return a DataFrame indexed by (place, year).
    Columns are population counts for age-sex groups (3 age bins x 2 sexes).
    """
    rows = []
    for p in places:
        for y in years:
            rows.append({
                "place": p,
                "year": y,
                "Male_0_14": population(y, "male", (0,14), p),
                "Male_15_64": population(y, "male", (15,64), p),
                "Male_65_100": population(y, "male", (65,100), p),
                "Female_0_14": population(y, "female", (0,14), p),
                "Female_15_64": population(y, "female", (15,64), p),
                "Female_65_100": population(y, "female", (65,100), p),
            })
    return pd.DataFrame(rows).set_index(["place", "year"]).sort_index()

In [10]:
df = population_dataframe(
    places=["CUB", "USA"],
    years=range(1960, 2024)
)
df.head()



Unnamed: 0_level_0,Unnamed: 1_level_0,Male_0_14,Male_15_64,Male_65_100,Female_0_14,Female_15_64,Female_65_100
place,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CUB,1960,1332030.0,2206805.0,158503.309123,1275614.0,2113339.0,151790.190877
CUB,1961,1364712.0,2243471.0,161776.775692,1309398.0,2152540.0,155219.724308
CUB,1962,1401601.0,2277814.0,165609.21636,1347305.0,2189575.0,159193.78364
CUB,1963,1442509.0,2310876.0,170201.356426,1389011.0,2225173.0,163889.143574
CUB,1964,1483939.0,2344502.0,175813.644195,1431134.0,2261074.0,169557.377758
