# Essential Functions

### 0. Imports

In [4]:
# %pip install wbdata
# $%pip install plotly
!pip install plotly
import plotly.express as px

import wbdata
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as gp



### 1. Population DataFrame Function

In [None]:
## This function is based on the colab file shared by the prof last week.
## And therefore similar methods are used in function below.

def population_df(countries, start_yr, end_yr):
  """
      @param: countries (list) : list of country codes
      @param: start_yr (int) : starting year
      @param: end_yr (str) : end year
      return: population data frame indexed by country, year, and columns
              giving counts of people in different age-sex groups
  """
  ages = {
        "0-14": {
            "male": "SP.POP.0014.MA.IN",
            "female": "SP.POP.0014.FE.IN",
        },
        "15-64": {
            "male": "SP.POP.1564.MA.IN",
            "female": "SP.POP.1564.FE.IN",
        },
        "65+": {
            "male": "SP.POP.65UP.MA.IN",
            "female": "SP.POP.65UP.FE.IN",
        },
    }

  df = []
  for age_range in ages:
    for sex in ages[age_range]:
      indicator = ages[age_range][sex]
      name = f":{age_range}_{sex}"
      data = wbdata.get_data(indicator,
                             country = countries,
                             date = (dt.datetime(start_yr, 1, 1), dt.datetime(end_yr, 1, 1)))

      new_df = pd.DataFrame(data)
      new_df.rename(columns = {"value" : name}, inplace = True)

      # suggestion from Gemini (to fix errors/modify code)
      new_df['Year'] = new_df['date'].astype(int)
      new_df.drop(columns = ['date'], inplace = True)
      new_df.set_index('Year', inplace = True)

      ## suggestion from Gemini (to fix errors/modify code)
      new_df['Country'] = new_df['country'].apply(lambda x: x['value'] if isinstance(x, dict) else x)
      new_df.drop(columns = ['country'], inplace = True)
      new_df.set_index('Country', inplace = True)

      new_df = new_df[[name]]
      df.append(new_df)

  if df:
    main_df = pd.concat(df, axis = 1)
    main_df.sort_index(inplace = True)
  else:
    main_df = pd.DataFrame()

  return main_df

df = population_df(["USA", "IND", "JPN"], 2010, 2020)
display(df)

Unnamed: 0_level_0,:0-14_male,:0-14_female,:15-64_male,:15-64_female,:65+_male,:65+_female
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
India,193049643,176475236,488513474,454575427,42612350,47391566
India,194822123,177886985,481642958,448232319,40885983,45559944
India,196462625,179200843,474541800,441709500,39045735,43698562
India,198093854,180541801,467043895,434781814,37281347,41914690
India,199649050,181831596,459164278,427477653,35603764,40217956
India,201020492,182951247,451259170,420161099,34019075,38613415
India,202296224,183993964,443434941,412907483,32534945,37109634
India,203236901,184739320,435329609,405365662,31310167,35847852
India,203778931,185136050,426997208,397634456,30340529,34787328
India,204097696,185368138,418644475,389896933,29448014,33769698


### 2. Population (Counts) Function

In [None]:
## This function is based on the colab file shared by the prof last week.
## And therefore similar methods are used in function below.

def population(year, sex, age_range, place):
  """
      @param: year (int)
      @param: sex (str) : input as "male" or "female"
      @param: age_range (str) : inputs as "0-14", "15-64", "65+"
      @param: place (str)
      return: population count (int)
  """
  ages = {
        "0-14": {
            "male": "SP.POP.0014.MA.IN",
            "female": "SP.POP.0014.FE.IN",
        },
        "15-64": {
            "male": "SP.POP.1564.MA.IN",
            "female": "SP.POP.1564.FE.IN",
        },
        "65+": {
            "male": "SP.POP.65UP.MA.IN",
            "female": "SP.POP.65UP.FE.IN",
        },
    }

  if age_range not in ages or sex not in ["male", "female"]:
        raise ValueError("Invalid input.")

  indicator = ages[age_range][sex]

  data = wbdata.get_data(indicator, country = place, date = (dt.datetime(year, 1, 1), dt.datetime(year, 1, 1)))

  if not data:
      return None
  # suggestion from Google/Gemini to fix error
  elif data and isinstance(data, list):
    for item in data:
      if item.get("value") is not None:
        return item["value"]
      print("population count not available")
      return None

print(population(2020, "male", "0-14", "USA"))
print(population(2015, "female", "15-64", "IND"))
print(population(2010, "male", "65+", "JPN"))

31318046
420161099
12726650


### 3. Population Pyramid Function

In [None]:
def population_pyramid(df):
  """
      @param: df (DataFrame)
      return: None

      Assume df has columns {"age_range", "male_immigrants", "female_immigrants"}
  """
  """
  age_segments = df["age_range"]
  male_immigrant_pop = df["male_immigrants"]
  female_immigrant_pop = df["female_immigrants"]

  fig, axis = plt.subplots(ncols = 2, sharey = True, figsize=(10, 5))
  axis[0].barh(age_segments, male_immigrant_pop, color = "blue")
  axis[1].barh(age_segments, female_immigrant_pop, color = "pink")

  axis[0].set_title("Male Immigrants")
  axis[1].set_title("Female Immigrants")

  plt.tight_layout()
  plt.show()
  """
  # adapted code using idea from this source:
  # https://www.geeksforgeeks.org/how-to-create-a-population-pyramid-using-plotly-in-python/

  fig = gp.Figure()

  # add male pop
  fig.add_trace(gp.Bar(y = df["age_range"], x = -df["male_immigrants"],
                       name = "Male", orientation = "h", marker_color = "blue"))

  # add female pop
  fig.add_trace(gp.Bar(y = df["age_range"], x = df["female_immigrants"],
                       name = "Female", orientation = "h", marker_color = "green"))

  # update graph layout
  fig.update_layout(title ="Population Pyramid of Immigrants", xaxis = dict(
    title="Population",
    tickvals = [-max(df["male_immigrants"]), 0, max(df["female_immigrants"])],
    ticktext = [f"{max(df['male_immigrants']):,}", "0", f"{max(df['female_immigrants']):,}"]),
    yaxis = dict(title="Age Range", categoryorder='category ascending'),
    barmode = "relative")


  fig.show()

df = pd.DataFrame({
    "age_range": ["0-10", "11-20", "21-30", "31-40", "41-50"],
    "male_immigrants": [500, 600, 700, 800, 900],
    "female_immigrants": [550, 650, 750, 850, 950]
})

population_pyramid(df)

### 4. World Map

In [5]:
def plot_net_migration_map(year):
    """
    Creates a choropleth map of net migration by country for the given year,
    using the country's ISO-3 code for geometry (so 'TUR' is recognized),
    while displaying the WB name 'Turkiye' in hover labels.
    """
    # 1. Fetch the net migration data (all countries, parse dates)
    indicator = {"SM.POP.NETM": "Net Migration"}
    df = wbdata.get_dataframe(indicator, country="all", parse_dates=True)
    df.dropna(inplace=True)
    df.reset_index(inplace=True)  # columns: ['country', 'date', 'Net Migration']
    # 2. Extract the year from the date (datetime)
    df["Year"] = df["date"].dt.year
    df_year = df[df["Year"] == year]
    if df_year.empty:
        raise ValueError(f"No net migration data found for {year}. Try another year.")
    # 3. Get the list of WB countries to retrieve their ISO-3 'id'
    wb_countries = wbdata.get_countries()
    # Convert that to a DataFrame
    country_df = pd.DataFrame(wb_countries)
    # 'country_df' has columns like: ['id','iso2Code','name','region',...]
    # 4. Merge the net migration data (df_year) with the country info (country_df) on "name" vs. "country"
    merged = pd.merge(
        df_year,
        country_df[['id','name']],
        how='left',
        left_on='country',
        right_on='name'
    )
    # Now 'merged' has columns: ['country','date','Net Migration','Year','id','name'].
    #  - merged['id'] is the ISO-3 code (e.g., "TUR").
    #  - merged['country'] is "Turkiye".
    # 5. Create the choropleth
    #    Use locationmode="ISO-3" and pass 'id' (the ISO-3 code) to 'locations',
    fig = px.choropleth(
        merged,
        locations="id",                 # This is the ISO-3 code
        locationmode="ISO-3",
        color="Net Migration",
        hover_name="country",
        color_continuous_scale=[
    (0.0, "lightblue"),
    (0.5, "white"),
    (1.0, "lightcoral")
],
        range_color=(-10000, 10000),
        title=f"Net Migration by Country in {year}",
        labels={"Net Migration": "Number of Migrants"},
        projection="natural earth"
    )
    # 6. Improve layout
    fig.update_layout(
        geo=dict(showframe=False, showcoastlines=True),
        height=600
    )
    fig.show()
plot_net_migration_map(2018)