**Note: We can write a rough sketch of our function. However, it may be subject to change. This depends on which database we use for the project. Once everything is finalized, I'll create a Git repo.**

## **EEP 153 Project 1: Net Migration Analysis**

In [1]:
## If import fails with "ModuleNotFoundError"
## uncomment below & try again
## %pip install wbdata
## %pip install plotly

import wbdata
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as gp

In [None]:
# Return list of all country/region codes:
wbdata.get_countries()

# Return list matching a query term:
#wbdata.get_countries(query="World")
#wbdata.get_countries(query="United")

## Try your own search!
# wbdata.get_countries(query="")

id    name
----  --------------------------------------------------------------------------------
ABW   Aruba
AFE   Africa Eastern and Southern
AFG   Afghanistan
AFR   Africa
AFW   Africa Western and Central
AGO   Angola
ALB   Albania
AND   Andorra
ARB   Arab World
ARE   United Arab Emirates
ARG   Argentina
ARM   Armenia
ASM   American Samoa
ATG   Antigua and Barbuda
AUS   Australia
AUT   Austria
AZE   Azerbaijan
BDI   Burundi
BEA   East Asia & Pacific (IBRD-only countries)
BEC   Europe & Central Asia (IBRD-only countries)
BEL   Belgium
BEN   Benin
BFA   Burkina Faso
BGD   Bangladesh
BGR   Bulgaria
BHI   IBRD countries classified as high income
BHR   Bahrain
BHS   Bahamas, The
BIH   Bosnia and Herzegovina
BLA   Latin America & the Caribbean (IBRD-only countries)
BLR   Belarus
BLZ   Belize
BMN   Middle East & North Africa (IBRD-only countries)
BMU   Bermuda
BOL   Bolivia
BRA   Brazil
BRB   Barbados
BRN   Brunei Darussalam
BSS   Sub-Saharan Africa (IBRD-only countries)
BTN   Bhutan
BWA  

In [None]:
wbdata.get_sources()

  id  name
----  --------------------------------------------------------------------
   1  Doing Business
   2  World Development Indicators
   3  Worldwide Governance Indicators
   5  Subnational Malnutrition Database
   6  International Debt Statistics
  11  Africa Development Indicators
  12  Education Statistics
  13  Enterprise Surveys
  14  Gender Statistics
  15  Global Economic Monitor
  16  Health Nutrition and Population Statistics
  18  IDA Results Measurement System
  19  Millennium Development Goals
  20  Quarterly Public Sector Debt
  22  Quarterly External Debt Statistics SDDS
  23  Quarterly External Debt Statistics GDDS
  25  Jobs
  27  Global Economic Prospects
  28  Global Financial Inclusion
  29  The Atlas of Social Protection: Indicators of Resilience and Equity
  30  Exporter Dynamics Database – Indicators at Country-Year Level
  31  Country Policy and Institutional Assessment
  32  Global Financial Development
  33  G20 Financial Inclusion Indicators
  34  Glob

In [None]:
SOURCE = 40 # CHANGE THIS BASED ON OUR PROJ

indicators = wbdata.get_indicators(source=SOURCE)
indicators

id                 name
-----------------  -------------------------------------------------------------------
SH.DTH.0509        Number of deaths ages 5-9 years
SH.DTH.0514        Number of deaths ages 5-14 years
SH.DTH.1014        Number of deaths ages 10-14 years
SH.DTH.1019        Number of deaths ages 10-19 years
SH.DTH.1519        Number of deaths ages 15-19 years
SH.DTH.2024        Number of deaths ages 20-24 years
SH.DTH.IMRT        Number of infant deaths
SH.DTH.IMRT.FE     Number of infant deaths, female
SH.DTH.IMRT.MA     Number of infant deaths, male
SH.DTH.MORT        Number of under-five deaths
SH.DTH.MORT.FE     Number of under-five deaths, female
SH.DTH.MORT.MA     Number of under-five deaths, male
SH.DTH.NMRT        Number of neonatal deaths
SH.DYN.0509        Probability of dying among children ages 5-9 years (per 1,000)
SH.DYN.0514        Probability of dying at age 5-14 years (per 1,000 children age 5)
SH.DYN.1014        Probability of dying among adolescents ages 1

In [None]:
import plotly.express as px

# Define the Net Migration indicator
indicator = {"SM.POP.NETM": "Net Migration"}

# Fetch net migration data for all countries
migration_data = wbdata.get_dataframe(indicator, country="all", parse_dates=True)

# Drop missing values
migration_data = migration_data.dropna()

# Reset index so 'date' becomes a column
migration_data = migration_data.reset_index()

# Ensure 'date' is formatted correctly
migration_data['date'] = pd.to_datetime(migration_data['date'])

# Identify Asia-Pacific countries using World Bank region classification
asia_pacific_countries = wbdata.get_countries()
asia_pacific_countries = [country['id'] for country in asia_pacific_countries if country['region']['value'] == "East Asia & Pacific"]

# Filter for Asia-Pacific countries
migration_data = migration_data[migration_data['country'].isin(asia_pacific_countries)]

# Create an interactive line chart
fig = px.line(migration_data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in the Asia-Pacific Region",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

# Improve visualization: Add hover interaction and better layout
fig.update_traces(mode="lines", hoverinfo="x+y+name")
fig.update_layout(
    plot_bgcolor="white",
    paper_bgcolor="white",
    hovermode="x unified",
    legend_title="Country",
    height=700,
    title_font=dict(size=20),
    font=dict(color="black")
)

# Show the interactive plot
fig.show()


AttributeError: module 'wbdata' has no attribute 'get_country'

In [None]:
import wbdata
import plotly.express as px

# Fetch and process Net Migration data
indicator = {"SM.POP.NETM": "Net Migration"}
asia_pacific_countries = [
    "Australia", "Bangladesh", "Brunei Darussalam", "Cambodia", "China", "Fiji", "Indonesia", "Japan",
    "Kiribati", "Korea, Rep.", "Lao PDR", "Malaysia", "Maldives", "Mongolia", "Myanmar", "Nepal", "New Zealand",
    "Pakistan", "Papua New Guinea", "Philippines", "Singapore", "Solomon Islands", "Sri Lanka", "Thailand",
    "Timor-Leste", "Tonga", "Tuvalu", "Vanuatu", "Vietnam"
]

# Fetch and filter data
migration_data = (
    wbdata.get_dataframe(indicator, country="all", parse_dates=True)
    .dropna()
    .reset_index()
    .query("country in @asia_pacific_countries")
)

# Create and customize the interactive line chart
fig = px.line(migration_data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in the Asia-Pacific Region",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"}
)

fig.update_layout(
    plot_bgcolor="white", paper_bgcolor="white",
    hovermode="x unified", legend_title="Country",
    height=700, title_font_size=20, font_color="black"
)

fig.show()

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Define the Net Migration indicator
indicator = {"SM.POP.NETM": "Net Migration"}

# Fetch net migration data for all countries
migration_data = wbdata.get_dataframe(indicator, country="all", parse_dates=True)

# Drop missing values
migration_data = migration_data.dropna()

# Reset index so 'date' becomes a column
migration_data = migration_data.reset_index()

# Ensure 'date' is formatted correctly
migration_data['date'] = pd.to_datetime(migration_data['date'])

# List of Middle East countries by their common names
middle_east_countries = [
    "Bahrain", "Iran, Islamic Rep.", "Iraq", "Israel", "Jordan", "Kuwait",
    "Lebanon", "Oman", "Qatar", "Saudi Arabia", "Syrian Arab Republic",
    "United Arab Emirates", "West Bank and Gaza", "Yemen, Rep."
]

# Filter the DataFrame for Middle East countries
migration_data = migration_data[migration_data['country'].isin(middle_east_countries)]

# Create an interactive line chart
fig = px.line(migration_data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in the Middle East",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

# Improve visualization: Add hover interaction and better layout
fig.update_traces(mode="lines", hoverinfo="x+y+name")
fig.update_layout(
    plot_bgcolor="white",
    paper_bgcolor="white",
    hovermode="x unified",
    legend_title="Country",
    height=700,
    title_font=dict(size=20),
    font=dict(color="black")
)

# Show the interactive plot
fig.show()


In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Define the Net Migration indicator
indicator = {"SM.POP.NETM": "Net Migration"}

# Fetch net migration data for all countries
migration_data = wbdata.get_dataframe(indicator, country="all", parse_dates=True)

# Drop missing values
migration_data = migration_data.dropna()

# Reset index so 'date' becomes a column
migration_data = migration_data.reset_index()

# Ensure 'date' is formatted correctly
migration_data['date'] = pd.to_datetime(migration_data['date'])

# List of European countries by their common names
europe_countries = [
    "Albania", "Andorra", "Austria", "Belarus", "Belgium", "Bosnia and Herzegovina",
    "Bulgaria", "Croatia", "Cyprus", "Czechia", "Denmark", "Estonia", "Finland", "France",
    "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy", "Kosovo", "Latvia",
    "Liechtenstein", "Lithuania", "Luxembourg", "Malta", "Moldova", "Monaco", "Montenegro",
    "Netherlands", "North Macedonia", "Norway", "Poland", "Portugal", "Romania",
    "San Marino", "Serbia", "Slovak Republic", "Slovenia", "Spain", "Sweden", "Switzerland",
    "Ukraine", "United Kingdom", "Turkiye"
]

# Filter the DataFrame for European countries
migration_data = migration_data[migration_data['country'].isin(europe_countries)]

# Create an interactive line chart
fig = px.line(migration_data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in Europe",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

# Improve visualization: Add hover interaction and better layout
fig.update_traces(mode="lines", hoverinfo="x+y+name")
fig.update_layout(
    plot_bgcolor="white",
    paper_bgcolor="white",
    hovermode="x unified",
    legend_title="Country",
    height=700,
    title_font=dict(size=20),
    font=dict(color="black")
)

# Show the interactive plot
fig.show()

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Define the Net Migration indicator
indicator = {"SM.POP.NETM": "Net Migration"}

# Fetch net migration data for all countries
migration_data = wbdata.get_dataframe(indicator, country="all", parse_dates=True)

# Drop missing values
migration_data = migration_data.dropna()

# Reset index so 'date' becomes a column
migration_data = migration_data.reset_index()

# Ensure 'date' is formatted correctly
migration_data['date'] = pd.to_datetime(migration_data['date'])

# Define regions and countries within them
regions = {
    "Asia-Pacific": [
        "Australia", "Bangladesh", "Brunei Darussalam", "Cambodia", "China", "Fiji", "Indonesia", "Japan",
        "Kiribati", "Korea, Rep.", "Lao PDR", "Malaysia", "Maldives", "Mongolia", "Myanmar", "Nepal", "New Zealand",
        "Pakistan", "Papua New Guinea", "Philippines", "Singapore", "Solomon Islands", "Sri Lanka", "Thailand",
        "Timor-Leste", "Tonga", "Tuvalu", "Vanuatu", "Vietnam"
    ],
    "Europe": [
        "Albania", "Andorra", "Austria", "Belarus", "Belgium", "Bosnia and Herzegovina", "Bulgaria", "Croatia",
        "Cyprus", "Czechia", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Iceland",
        "Ireland", "Italy", "Kosovo", "Latvia", "Liechtenstein", "Lithuania", "Luxembourg", "Malta", "Moldova",
        "Monaco", "Montenegro", "Netherlands", "North Macedonia", "Norway", "Poland", "Portugal", "Romania",
        "San Marino", "Serbia", "Slovak Republic", "Slovenia", "Spain", "Sweden", "Switzerland", "Ukraine",
        "United Kingdom"
    ],
    "Africa": [
        "Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi", "Cabo Verde", "Cameroon",
        "Central African Republic", "Chad", "Comoros", "Congo, Dem. Rep.", "Congo, Rep.", "Cote d'Ivoire",
        "Djibouti", "Egypt, Arab Rep.", "Equatorial Guinea", "Eritrea", "Eswatini", "Ethiopia", "Gabon",
        "Gambia, The", "Ghana", "Guinea", "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya", "Madagascar",
        "Malawi", "Mali", "Mauritania", "Mauritius", "Morocco", "Mozambique", "Namibia", "Niger", "Nigeria",
        "Rwanda", "Sao Tome and Principe", "Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa",
        "South Sudan", "Sudan", "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia", "Zimbabwe"
    ],
    "Middle East": [
        "Bahrain", "Iran, Islamic Rep.", "Iraq", "Israel", "Jordan", "Kuwait", "Lebanon", "Oman", "Qatar",
        "Saudi Arabia", "Syrian Arab Republic", "United Arab Emirates", "West Bank and Gaza", "Yemen, Rep."
    ],
    "North America": [
        "Canada", "United States", "Mexico"
    ],
    "South America": [
        "Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Guyana", "Paraguay", "Peru",
        "Suriname", "Uruguay", "Venezuela, RB"
    ]
}

# Create a region column based on country membership
def assign_region(country):
    for region, countries in regions.items():
        if country in countries:
            return region
    return None

migration_data['region'] = migration_data['country'].apply(assign_region)

# Filter out countries that don't belong to any region (if any)
migration_data = migration_data.dropna(subset=['region'])

# Group by date and region, then sum the Net Migration
region_data = migration_data.groupby(['date', 'region'])['Net Migration'].sum().reset_index()

# Create an interactive line chart
fig = px.line(region_data, x="date", y="Net Migration", color="region",
              title="Net Migration Trends by Region",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "region": "Region"})

# Improve visualization: Add hover interaction and better layout
fig.update_traces(mode="lines", hoverinfo="x+y+name")
fig.update_layout(
    plot_bgcolor="white",
    paper_bgcolor="white",
    hovermode="x unified",
    legend_title="Region",
    height=700,
    title_font=dict(size=20),
    font=dict(color="black")
)

# Show the interactive plot
fig.show()


In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Define the Net Migration indicator
indicator = {"SM.POP.NETM": "Net Migration"}

# Fetch net migration data for all countries
migration_data = wbdata.get_dataframe(indicator, country="all", parse_dates=True)

# Drop missing values
migration_data = migration_data.dropna()

# Reset index so 'date' becomes a column
migration_data = migration_data.reset_index()

# Convert 'date' to datetime format
migration_data['date'] = pd.to_datetime(migration_data['date'])

# Define country groups
north_america = ["United States", "Canada", "Mexico"]
south_america = ["Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Guyana",
                 "Paraguay", "Peru", "Suriname", "Uruguay", "Venezuela"]

# Create a new column for region categorization
migration_data["Region"] = migration_data["country"].apply(
    lambda x: "North America" if x in north_america else "South America" if x in south_america else None
)

# Filter only North and South American countries
migration_data = migration_data.dropna(subset=["Region"])

# Apply smoothing (7-year rolling average) for each country
migration_data["Smoothed Net Migration"] = migration_data.groupby("country")["Net Migration"].transform(lambda x: x.rolling(window=7, min_periods=1).mean())

# Create an interactive line chart with all countries in one graph, color-coded by region
fig = px.line(migration_data, x="date", y="Smoothed Net Migration", color="country",
              title="Smoothed Net Migration Trends in North & South America",
              labels={"date": "Year", "Smoothed Net Migration": "Number of Migrants", "country": "Country"},
              line_group="country",
              hover_name="country"
)

# Improve visualization: Add hover effects and better layout
fig.update_traces(mode="lines", hoverinfo="x+y+name")

# Update layout for clarity and set background color to white
fig.update_layout(
    hovermode="x unified",
    legend_title="Country",
    height=800,
    xaxis=dict(title="Year", showgrid=False),
    yaxis=dict(title="Smoothed Net Migration", showgrid=True),
    font=dict(size=14),
    plot_bgcolor="white",  # Set plot background to white
    paper_bgcolor="white"  # Set outer background to white
)

# Show the interactive plot
fig.show()

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = {"SM.POP.NETM": "Net Migration"}
data = wbdata.get_dataframe(indicator, country="all").dropna().reset_index()

# Define country groups
north_america = ["United States", "Canada", "Mexico"]
central_america = ["Belize", "Costa Rica", "El Salvador", "Guatemala", "Honduras", "Nicaragua", "Panama"]
south_america = ["Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Guyana",
                 "Paraguay", "Peru", "Suriname", "Uruguay", "Venezuela"]

# Assign regions
data["Region"] = data["country"].apply(lambda x: "North America" if x in north_america else
                                       "Central America" if x in central_america else
                                       "South America" if x in south_america else None)

# Filter for relevant countries
data = data.dropna(subset=["Region"])

# Create an interactive plot
fig = px.line(data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in The Americas",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()

In [None]:
# Ensure necessary packages are installed
!pip install --upgrade pip
!pip install wbdata pandas_datareader plotly

import wbdata
import pandas as pd
import plotly.express as px

# Define the Net Migration indicator
indicator = {"SM.POP.NETM": "Net Migration"}

# Fetch net migration data for all countries
migration_data = wbdata.get_dataframe(indicator, country="all", parse_dates=True)

# Drop missing values
migration_data = migration_data.dropna()

# Reset index so 'date' becomes a column
migration_data = migration_data.reset_index()

# Convert 'date' to datetime format
migration_data['date'] = pd.to_datetime(migration_data['date'])

# List of African countries based on World Bank classification
african_countries = [
    "Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi", "Cabo Verde", "Cameroon",
    "Central African Republic", "Chad", "Comoros", "Congo, Dem. Rep.", "Congo, Rep.", "Cote d'Ivoire",
    "Djibouti", "Egypt, Arab Rep.", "Equatorial Guinea", "Eritrea", "Eswatini", "Ethiopia", "Gabon",
    "Gambia, The", "Ghana", "Guinea", "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya", "Madagascar",
    "Malawi", "Mali", "Mauritania", "Mauritius", "Morocco", "Mozambique", "Namibia", "Niger", "Nigeria",
    "Rwanda", "Sao Tome and Principe", "Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa",
    "South Sudan", "Sudan", "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia", "Zimbabwe"
]

# Filter data for African countries
migration_data = migration_data[migration_data['country'].isin(african_countries)]

# Apply smoothing (7-year rolling average) for better visualization
migration_data["Smoothed Net Migration"] = migration_data.groupby("country")["Net Migration"].transform(lambda x: x.rolling(window=7, min_periods=1).mean())

# Create an interactive line chart
fig = px.line(migration_data, x="date", y="Smoothed Net Migration", color="country",
              title="Smoothed Net Migration Trends in Africa",
              labels={"date": "Year", "Smoothed Net Migration": "Number of Migrants", "country": "Country"},
              line_group="country",
              hover_name="country"
)

# Improve visualization: Add hover effects and better layout
fig.update_traces(mode="lines", hoverinfo="x+y+name")

# Update layout for a clean white background
fig.update_layout(
    hovermode="x unified",
    legend_title="Country",
    height=800,
    xaxis=dict(title="Year", showgrid=False),
    yaxis=dict(title="Smoothed Net Migration", showgrid=True),
    font=dict(size=14),
    plot_bgcolor="white",  # Set the plot background to white
    paper_bgcolor="white"  # Set the surrounding background to white
)

# Show the interactive plot
fig.show()

Collecting pip
  Downloading pip-25.0-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-25.0-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.0




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
from re import A
### create data frame based for migration soruce
migration = {"SM.POP.NETM" : "Net Migration"}
migration_df = wbdata.get_dataframe(migration, country = ["USA"])
# print(migration_df)

### create data frame for age groups
ages_groups = {
    "SP.POP.0014.MA" : "Male 0-14",
    "SP.POP.0014.FE" : "Female 0-14",
    "SP.POP.1564.MA" : "Male 15-64",
    "SP.POP.1564.FE" : "Female 15-64",
    "SP.POP.65UP.MA" : "Male 65+",
    "SP.POP.65UP.FE" : "Female 65+",
}

age_group_df = wbdata.get_dataframe(ages_groups)

print(age_group_df)

RuntimeError: Got error 120 (Invalid value): The provided parameter value is not valid

In [10]:
## This function is based on the colab file shared by the prof last week.
## And therefore similar methods are used in function below.
import wbdata
import datetime as dt

def population(year, sex, age_range, place):
  """
      @param: year (int)
      @param: sex (str) : input as "male" or "female"
      @param: age_range (str) : inputs as "0-14", "15-64", "65+"
      @param: place (str)
      return: population count (int)
  """
  ages = {
        "0-14": {
            "male": "SP.POP.0014.MA.IN",
            "female": "SP.POP.0014.FE.IN",
        },
        "15-64": {
            "male": "SP.POP.1564.MA.IN",
            "female": "SP.POP.1564.FE.IN",
        },
        "65+": {
            "male": "SP.POP.65UP.MA.IN",
            "female": "SP.POP.65UP.FE.IN",
        },
    }

  if age_range not in ages or sex not in ["male", "female"]:
        raise ValueError("Invalid input.")

  indicator = ages[age_range][sex]

  data = wbdata.get_data(indicator, country = place, date = (dt.datetime(year, 1, 1), dt.datetime(year, 1, 1)))

  if not data:
      return None
  # suggestion from Google/Gemini to fix error
  elif data and isinstance(data, list):
    for item in data:
      if item.get("value") is not None:
        return item["value"]
      print("population count not available")
      return None

print(population(2020, "male", "0-14", "USA"))
print(population(2015, "female", "15-64", "IND"))
print(population(2010, "male", "65+", "JPN"))

31318046
420161099
12726650


In [19]:
## This function is based on the colab file shared by the prof last week.
## And therefore similar methods are used in function below.
import wbdata
import datetime as dt

def population_df(countries, start_yr, end_yr):
  """
      @param: countries (list) : list of country codes
      @param: start_yr (int) : starting year
      @param: end_yr (str) : end year
      return: population data frame indexed by country, year, and columns
              giving counts of people in different age-sex groups
  """
  ages = {
        "0-14": {
            "male": "SP.POP.0014.MA.IN",
            "female": "SP.POP.0014.FE.IN",
        },
        "15-64": {
            "male": "SP.POP.1564.MA.IN",
            "female": "SP.POP.1564.FE.IN",
        },
        "65+": {
            "male": "SP.POP.65UP.MA.IN",
            "female": "SP.POP.65UP.FE.IN",
        },
    }

  df = []
  for age_range in ages:
    for sex in ages[age_range]:
      indicator = ages[age_range][sex]
      name = f":{age_range}_{sex}"
      data = wbdata.get_data(indicator,
                             country = countries,
                             date = (dt.datetime(start_yr, 1, 1), dt.datetime(end_yr, 1, 1)))

      new_df = pd.DataFrame(data)
      new_df.rename(columns = {"value" : name}, inplace = True)

      new_df['Year'] = new_df['date'].astype(int)
      new_df.drop(columns = ['date'], inplace = True)
      new_df.set_index('Year', inplace = True)

      new_df['Country'] = new_df['country'].apply(lambda x: x['value'] if isinstance(x, dict) else x)
      new_df.drop(columns = ['country'], inplace = True)
      new_df.set_index('Country', inplace = True)

      new_df = new_df[[name]]
      df.append(new_df)

  if df:
    main_df = pd.concat(df, axis = 1)
    main_df.sort_index(inplace = True)
  else:
    main_df = pd.DataFrame()

  return main_df

df = population_df(["USA", "IND", "JPN"], 2010, 2020)
display(df)

Unnamed: 0_level_0,:0-14_male,:0-14_female,:15-64_male,:15-64_female,:65+_male,:65+_female
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
India,193049643,176475236,488513474,454575427,42612350,47391566
India,194822123,177886985,481642958,448232319,40885983,45559944
India,196462625,179200843,474541800,441709500,39045735,43698562
India,198093854,180541801,467043895,434781814,37281347,41914690
India,199649050,181831596,459164278,427477653,35603764,40217956
India,201020492,182951247,451259170,420161099,34019075,38613415
India,202296224,183993964,443434941,412907483,32534945,37109634
India,203236901,184739320,435329609,405365662,31310167,35847852
India,203778931,185136050,426997208,397634456,30340529,34787328
India,204097696,185368138,418644475,389896933,29448014,33769698


In [11]:
def population_pyramid(df):
  """
      @param: df (DataFrame)
      return: None

      Assume df has columns {"age_range", "male_immigrants", "female_immigrants"}
  """
  """
  age_segments = df["age_range"]
  male_immigrant_pop = df["male_immigrants"]
  female_immigrant_pop = df["female_immigrants"]

  fig, axis = plt.subplots(ncols = 2, sharey = True, figsize=(10, 5))
  axis[0].barh(age_segments, male_immigrant_pop, color = "blue")
  axis[1].barh(age_segments, female_immigrant_pop, color = "pink")

  axis[0].set_title("Male Immigrants")
  axis[1].set_title("Female Immigrants")

  plt.tight_layout()
  plt.show()
  """
  # adapted code using idea from this source:
  # https://www.geeksforgeeks.org/how-to-create-a-population-pyramid-using-plotly-in-python/

  fig = gp.Figure()

  # add male pop
  fig.add_trace(gp.Bar(y = df["age_range"], x = -df["male_immigrants"],
                       name = "Male", orientation = "h", marker_color = "blue"))

  # add female pop
  fig.add_trace(gp.Bar(y = df["age_range"], x = df["female_immigrants"],
                       name = "Female", orientation = "h", marker_color = "green"))

  # update graph layout
  fig.update_layout(title ="Population Pyramid of Immigrants", xaxis = dict(
    title="Population",
    tickvals = [-max(df["male_immigrants"]), 0, max(df["female_immigrants"])],
    ticktext = [f"{max(df['male_immigrants']):,}", "0", f"{max(df['female_immigrants']):,}"]),
    yaxis = dict(title="Age Range", categoryorder='category ascending'),
    barmode = "relative")


  fig.show()

df = pd.DataFrame({
    "age_range": ["0-10", "11-20", "21-30", "31-40", "41-50"],
    "male_immigrants": [500, 600, 700, 800, 900],
    "female_immigrants": [550, 650, 750, 850, 950]
})

population_pyramid(df)

In [None]:
import plotly.express as px

# Define the Net Migration indicator
indicator = {"SM.POP.NETM": "Net Migration"}

# Fetch net migration data for all countries
migration_data = wbdata.get_dataframe(indicator, country="all", parse_dates=True)

# Drop missing values
migration_data = migration_data.dropna()

# Reset index so 'date' becomes a column
migration_data = migration_data.reset_index()

# Print structure for debugging
print("DataFrame structure after fetching:")
print(migration_data.info())
print(migration_data.head())

# Ensure 'date' is formatted correctly
migration_data['date'] = pd.to_datetime(migration_data['date'])

# Create an interactive line chart (NO NEED TO MELT)
fig = px.line(migration_data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends by Country",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"},
              template="plotly_dark")

# Improve visualization: Add hover interaction and better layout
fig.update_traces(mode="lines", hoverinfo="x+y+name")
fig.update_layout(
    hovermode="x unified",
    legend_title="Country",
    height=700
)

# Show the interactive plot
fig.show()


In [None]:
import plotly.express as px

# Define the Net Migration indicator
indicator = {"SM.POP.NETM": "Net Migration"}

# Fetch net migration data for all countries
migration_data = wbdata.get_dataframe(indicator, country="all", parse_dates=True)

# Drop missing values
migration_data = migration_data.dropna()

# Reset index so 'date' becomes a column
migration_data = migration_data.reset_index()

# Convert 'date' to datetime format
migration_data['date'] = pd.to_datetime(migration_data['date'])

# Define country groups
north_america = ["United States", "Canada", "Mexico"]
south_america = ["Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Guyana",
                 "Paraguay", "Peru", "Suriname", "Uruguay", "Venezuela"]

# Create a new column for region categorization
migration_data["Region"] = migration_data["country"].apply(
    lambda x: "North America" if x in north_america else "South America" if x in south_america else None
)

# Filter only North and South America
migration_data = migration_data.dropna(subset=["Region"])

# Apply smoothing (7-year rolling average) for each country
migration_data["Smoothed Net Migration"] = migration_data.groupby("country")["Net Migration"].transform(lambda x: x.rolling(window=7, min_periods=1).mean())

# Create an interactive line chart with countries individually categorized by region
fig = px.line(migration_data, x="date", y="Smoothed Net Migration", color="country",
              title="Smoothed Net Migration for North & South American Countries",
              labels={"date": "Year", "Smoothed Net Migration": "Number of Migrants", "country": "Country"},
              template="plotly_dark",
              facet_row="Region")  # Separates regions visually

# Improve visualization with hover effects and layout
fig.update_traces(mode="lines", hoverinfo="x+y+name")
fig.update_layout(
    hovermode="x unified",
    legend_title="Country",
    height=800,
    xaxis=dict(title="Year", showgrid=False),
    yaxis=dict(title="Smoothed Net Migration", showgrid=True),
    font=dict(size=14)
)

# Show the interactive plot
fig.show()


NameError: name 'migration_data' is not defined

In [None]:
import wbdata
import plotly.graph_objects as go

# Define indicators and fetch data
indicators = {"SP.URB.GROW": "Urban Population Growth", "SM.POP.NETM": "Net Migration"}
data = wbdata.get_dataframe(indicators, country="all", parse_dates=True).dropna().reset_index()

# Regional country mapping
regions = {
    "Asia-Pacific": ["Australia", "Bangladesh", "Brunei Darussalam", "Cambodia", "China", "Fiji", "Indonesia", "Japan",
                     "Kiribati", "Korea, Rep.", "Lao PDR", "Malaysia", "Maldives", "Mongolia", "Myanmar", "Nepal",
                     "New Zealand", "Pakistan", "Papua New Guinea", "Philippines", "Singapore", "Solomon Islands",
                     "Sri Lanka", "Thailand", "Timor-Leste", "Tonga", "Tuvalu", "Vanuatu", "Vietnam"],
    "Middle East": ["Bahrain", "Iran, Islamic Rep.", "Iraq", "Israel", "Jordan", "Kuwait", "Lebanon", "Oman", "Qatar",
                    "Saudi Arabia", "Syrian Arab Republic", "United Arab Emirates", "West Bank and Gaza", "Yemen, Rep."],
    "North America": ["United States", "Canada", "Mexico"],
    "South America": ["Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Guyana", "Paraguay", "Peru",
                      "Suriname", "Uruguay", "Venezuela"],
    "Europe": ["Albania", "Austria", "Belgium", "France", "Germany", "Italy", "Spain", "Sweden", "Switzerland",
               "United Kingdom"],
    "Africa": ["Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Cabo Verde", "Cameroon", "Congo, Dem. Rep.",
               "Ethiopia", "Kenya", "Morocco", "Nigeria", "South Africa", "Tanzania", "Uganda", "Zambia", "Zimbabwe"]
}

# Assign regions based on country
data['Region'] = data['country'].apply(lambda x: next((region for region, countries in regions.items() if x in countries), None))

# Filter and aggregate data by region and year
region_summary = (
    data.dropna(subset=["Region"])
    .assign(Year=lambda df: df['date'].dt.year)
    .groupby(["Region", "Year"])[["Urban Population Growth", "Net Migration"]].mean().reset_index()
)

# Create the line chart with dual y-axes
fig = go.Figure()

# Add lines for each region
for region in region_summary["Region"].unique():
    fig.add_trace(go.Scatter(
        x=region_summary.query("Region == @region")["Year"],
        y=region_summary.query("Region == @region")["Net Migration"],
        mode='lines',
        name=f'{region} - Net Migration',
        yaxis='y1'
    ))
    fig.add_trace(go.Scatter(
        x=region_summary.query("Region == @region")["Year"],
        y=region_summary.query("Region == @region")["Urban Population Growth"],
        mode='lines',
        name=f'{region} - Urban Growth',
        yaxis='y2'
    ))

# Configure layout
fig.update_layout(
    title="Net Migration and Urban Population Growth by Region",
    xaxis_title="Year",
    yaxis=dict(title="Net Migration", side="left"),
    yaxis2=dict(title="Urban Population Growth (%)", overlaying='y', side="right"),
    legend_title="Indicators",
    plot_bgcolor="white",
    paper_bgcolor="white",
    height=700
)

fig.show()


In [None]:
#################################################################. SIMPLIFIED CODE. ##################################################################

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = {"SM.POP.NETM": "Net Migration"}
data = wbdata.get_dataframe(indicator, country="all").dropna().reset_index()

# Define regions and their countries
regions = {
    "Asia-Pacific": ["Australia", "Bangladesh", "Brunei Darussalam", "Cambodia", "China", "Fiji", "Indonesia", "Japan",
                     "Kiribati", "Korea, Rep.", "Lao PDR", "Malaysia", "Maldives", "Mongolia", "Myanmar", "Nepal",
                     "New Zealand", "Pakistan", "Papua New Guinea", "Philippines", "Singapore", "Solomon Islands",
                     "Sri Lanka", "Thailand", "Timor-Leste", "Tonga", "Tuvalu", "Vanuatu", "Vietnam"],
    "Europe": ["Albania", "Andorra", "Austria", "Belarus", "Belgium", "Bosnia and Herzegovina", "Bulgaria", "Croatia",
               "Cyprus", "Czechia", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary",
               "Iceland", "Ireland", "Italy", "Kosovo", "Latvia", "Liechtenstein", "Lithuania", "Luxembourg", "Malta",
               "Moldova", "Monaco", "Montenegro", "Netherlands", "North Macedonia", "Norway", "Poland", "Portugal",
               "Romania", "San Marino", "Serbia", "Slovak Republic", "Slovenia", "Spain", "Sweden", "Switzerland",
               "Ukraine", "United Kingdom"],
    "Africa": ["Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi", "Cabo Verde", "Cameroon",
               "Central African Republic", "Chad", "Comoros", "Congo, Dem. Rep.", "Congo, Rep.", "Cote d'Ivoire",
               "Djibouti", "Egypt, Arab Rep.", "Equatorial Guinea", "Eritrea", "Eswatini", "Ethiopia", "Gabon",
               "Gambia, The", "Ghana", "Guinea", "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya", "Madagascar",
               "Malawi", "Mali", "Mauritania", "Mauritius", "Morocco", "Mozambique", "Namibia", "Niger", "Nigeria",
               "Rwanda", "Sao Tome and Principe", "Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa",
               "South Sudan", "Sudan", "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia", "Zimbabwe"],
    "Middle East": ["Bahrain", "Iran, Islamic Rep.", "Iraq", "Israel", "Jordan", "Kuwait", "Lebanon", "Oman", "Qatar",
                    "Saudi Arabia", "Syrian Arab Republic", "United Arab Emirates", "West Bank and Gaza", "Yemen, Rep."],
    "North America": ["Canada", "United States", "Mexico"],
    "South America": ["Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Guyana", "Paraguay", "Peru",
                      "Suriname", "Uruguay", "Venezuela, RB"]
}

# Assign regions to countries
region_map = {country: region for region, countries in regions.items() for country in countries}
data["region"] = data["country"].map(region_map)

# Filter out unassigned countries
data = data.dropna(subset=["region"])

# Aggregate data by region and year
region_data = data.groupby(["date", "region"])["Net Migration"].sum().reset_index()

# Create interactive plot
fig = px.line(region_data, x="date", y="Net Migration", color="region",
              title="Net Migration Trends by Region",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "region": "Region"})

fig.update_layout(hovermode="x unified", height=700, plot_bgcolor="white", paper_bgcolor="white")

fig.show()

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = {"SM.POP.NETM": "Net Migration"}
data = wbdata.get_dataframe(indicator, country="all").dropna().reset_index()

# Define African countries
africa = [
    "Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi", "Cabo Verde", "Cameroon",
    "Central African Republic", "Chad", "Comoros", "Congo, Dem. Rep.", "Congo, Rep.", "Cote d'Ivoire",
    "Djibouti", "Egypt, Arab Rep.", "Equatorial Guinea", "Eritrea", "Eswatini", "Ethiopia", "Gabon",
    "Gambia, The", "Ghana", "Guinea", "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya", "Madagascar",
    "Malawi", "Mali", "Mauritania", "Mauritius", "Morocco", "Mozambique", "Namibia", "Niger", "Nigeria",
    "Rwanda", "Sao Tome and Principe", "Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa",
    "South Sudan", "Sudan", "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia", "Zimbabwe"
]

# Filter for African countries
data = data[data["country"].isin(africa)]

# Create an interactive plot
fig = px.line(data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in Africa",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = {"SM.POP.NETM": "Net Migration"}
data = wbdata.get_dataframe(indicator, country="all").dropna().reset_index()

# Define Asia-Pacific countries
asia_pacific = [
    "Australia", "Bangladesh", "Brunei Darussalam", "Cambodia", "China", "Fiji", "Indonesia", "Japan",
    "Kiribati", "Korea, Rep.", "Lao PDR", "Malaysia", "Maldives", "Mongolia", "Myanmar", "Nepal", "New Zealand",
    "Pakistan", "Papua New Guinea", "Philippines", "Singapore", "Solomon Islands", "Sri Lanka", "Thailand",
    "Timor-Leste", "Tonga", "Tuvalu", "Vanuatu", "Vietnam"
]

# Filter for Asia-Pacific countries
data = data[data["country"].isin(asia_pacific)]

# Create interactive plot
fig = px.line(data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in Asia-Pacific",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = {"SM.POP.NETM": "Net Migration"}
data = wbdata.get_dataframe(indicator, country="all").dropna().reset_index()

# Define European countries
europe_countries = [
    "Albania", "Andorra", "Austria", "Belarus", "Belgium", "Bosnia and Herzegovina",
    "Bulgaria", "Croatia", "Cyprus", "Czechia", "Denmark", "Estonia", "Finland", "France",
    "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy", "Kosovo", "Latvia",
    "Liechtenstein", "Lithuania", "Luxembourg", "Malta", "Moldova", "Monaco", "Montenegro",
    "Netherlands", "North Macedonia", "Norway", "Poland", "Portugal", "Romania",
    "San Marino", "Serbia", "Slovak Republic", "Slovenia", "Spain", "Sweden", "Switzerland",
    "Ukraine", "United Kingdom", "Turkiye"
]

# Filter for European countries
data = data[data["country"].isin(europe_countries)]

# Create an interactive plot
fig = px.line(data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in Europe",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = {"SM.POP.NETM": "Net Migration"}
data = wbdata.get_dataframe(indicator, country="all").dropna().reset_index()

# Define Middle East countries
middle_east_countries = [
    "Bahrain", "Iran, Islamic Rep.", "Iraq", "Israel", "Jordan", "Kuwait",
    "Lebanon", "Oman", "Qatar", "Saudi Arabia", "Syrian Arab Republic",
    "United Arab Emirates", "West Bank and Gaza", "Yemen, Rep."
]

# Filter for Middle East countries
data = data[data["country"].isin(middle_east_countries)]

# Create an interactive plot
fig = px.line(data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in the Middle East",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = {"SM.POP.NETM": "Net Migration"}
data = wbdata.get_dataframe(indicator, country="all").dropna().reset_index()

# Define country groups
north_america = ["United States", "Canada", "Mexico"]
central_america = ["Belize", "Costa Rica", "El Salvador", "Guatemala", "Honduras", "Nicaragua", "Panama"]
south_america = ["Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Guyana",
                 "Paraguay", "Peru", "Suriname", "Uruguay", "Venezuela"]

# Assign regions
data["Region"] = data["country"].apply(lambda x: "North America" if x in north_america else
                                       "Central America" if x in central_america else
                                       "South America" if x in south_america else None)

# Filter for relevant countries
data = data.dropna(subset=["Region"])

# Create an interactive plot
fig = px.line(data, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in The Americas",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()