# Exploratory Data Analysis (EDA)

### 0. Imports

In [1]:
# %pip install wbdata
# %pip install plotly

import wbdata
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as gp
import plotly.express as px

### 1. Line Graph For All Regions

In [2]:
#Line graph for all regions

import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = "SM.POP.NETM"
data = wbdata.get_data(indicator, country="all")

# Convert to DataFrame
df = pd.DataFrame.from_records(
    [(entry['country']['value'], entry['date'], entry['value']) for entry in data if entry['value'] is not None],
    columns=["country", "date", "Net Migration"]
)

# Convert 'date' to integer for sorting
df["date"] = df["date"].astype(int)

# Define regions
regions = {
    "Asia-Pacific": ["Australia", "Bangladesh", "Brunei Darussalam", "Cambodia", "China", "Fiji", "Indonesia", "Japan",
                     "Kiribati", "Korea, Rep.", "Lao PDR", "Malaysia", "Maldives", "Mongolia", "Myanmar", "Nepal",
                     "New Zealand", "Pakistan", "Papua New Guinea", "Philippines", "Singapore", "Solomon Islands",
                     "Sri Lanka", "Thailand", "Timor-Leste", "Tonga", "Tuvalu", "Vanuatu", "Vietnam"],
    "Europe": ["Albania", "Andorra", "Austria", "Belarus", "Belgium", "Bosnia and Herzegovina", "Bulgaria", "Croatia",
               "Cyprus", "Czechia", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary",
               "Iceland", "Ireland", "Italy", "Kosovo", "Latvia", "Liechtenstein", "Lithuania", "Luxembourg", "Malta",
               "Moldova", "Monaco", "Montenegro", "Netherlands", "North Macedonia", "Norway", "Poland", "Portugal",
               "Romania", "San Marino", "Serbia", "Slovak Republic", "Slovenia", "Spain", "Sweden", "Switzerland",
               "Ukraine", "United Kingdom"],
    "Africa": ["Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi", "Cabo Verde", "Cameroon",
               "Central African Republic", "Chad", "Comoros", "Congo, Dem. Rep.", "Congo, Rep.", "Cote d'Ivoire",
               "Djibouti", "Egypt, Arab Rep.", "Equatorial Guinea", "Eritrea", "Eswatini", "Ethiopia", "Gabon",
               "Gambia, The", "Ghana", "Guinea", "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya", "Madagascar",
               "Malawi", "Mali", "Mauritania", "Mauritius", "Morocco", "Mozambique", "Namibia", "Niger", "Nigeria",
               "Rwanda", "Sao Tome and Principe", "Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa",
               "South Sudan", "Sudan", "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia", "Zimbabwe"],
    "Middle East": ["Bahrain", "Iran, Islamic Rep.", "Iraq", "Israel", "Jordan", "Kuwait", "Lebanon", "Oman", "Qatar",
                    "Saudi Arabia", "Syrian Arab Republic", "United Arab Emirates", "West Bank and Gaza", "Yemen, Rep."],
    "The Americas": ["United States", "Canada", "Mexico", "Belize", "Costa Rica", "El Salvador", "Guatemala",
                     "Honduras", "Nicaragua", "Panama", "Argentina", "Bolivia", "Brazil", "Chile", "Colombia",
                     "Ecuador", "Guyana", "Paraguay", "Peru", "Suriname", "Uruguay", "Venezuela, RB"]
}

# Assign regions to countries
df["region"] = df["country"].map({country: region for region, countries in regions.items() for country in countries})

# Filter to only assigned regions
df = df.dropna(subset=["region"])

# Aggregate migration data by region and year
region_data = df.groupby(["date", "region"])["Net Migration"].sum().reset_index()

# Interactive plot
fig = px.line(region_data, x="date", y="Net Migration", color="region",
              title="Net Migration Trends by Region",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "region": "Region"},
              line_group="region")

fig.update_layout(hovermode="x unified", height=700, plot_bgcolor="white", paper_bgcolor="white",
                  legend_title="Region")

# Show the graph
fig.show()

### 2. Line Graph For African Region

In [3]:
#Line graph for African region

import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = "SM.POP.NETM"
data = wbdata.get_data(indicator, country="all")

# Convert to DataFrame
df = pd.DataFrame.from_records(
    [(entry['country']['value'], entry['date'], entry['value']) for entry in data if entry['value'] is not None],
    columns=["country", "date", "Net Migration"]
)

# Convert 'date' to integer for sorting
df["date"] = df["date"].astype(int)

# Define African countries
africa = [
    "Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi", "Cabo Verde", "Cameroon",
    "Central African Republic", "Chad", "Comoros", "Congo, Dem. Rep.", "Congo, Rep.", "Cote d'Ivoire",
    "Djibouti", "Egypt, Arab Rep.", "Equatorial Guinea", "Eritrea", "Eswatini", "Ethiopia", "Gabon",
    "Gambia, The", "Ghana", "Guinea", "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya", "Madagascar",
    "Malawi", "Mali", "Mauritania", "Mauritius", "Morocco", "Mozambique", "Namibia", "Niger", "Nigeria",
    "Rwanda", "Sao Tome and Principe", "Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa",
    "South Sudan", "Sudan", "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia", "Zimbabwe"
]

# Filter for African countries
df = df[df["country"].isin(africa)]

# Create an interactive plot
fig = px.line(df, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in Africa",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()

### 3. Line Graph For Asian-Pacific Region

In [4]:
#Line graph for Asian-Pacific region

import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = "SM.POP.NETM"
data = wbdata.get_data(indicator, country="all")

# Convert to DataFrame
df = pd.DataFrame.from_records(
    [(entry['country']['value'], entry['date'], entry['value']) for entry in data if entry['value'] is not None],
    columns=["country", "date", "Net Migration"]
)

# Convert 'date' to integer for sorting
df["date"] = df["date"].astype(int)

# Define Asia-Pacific countries
asia_pacific = [
    "Australia", "Bangladesh", "Brunei Darussalam", "Cambodia", "China", "Fiji", "Indonesia", "Japan",
    "Kiribati", "Korea, Rep.", "Lao PDR", "Malaysia", "Maldives", "Mongolia", "Myanmar", "Nepal", "New Zealand",
    "Pakistan", "Papua New Guinea", "Philippines", "Singapore", "Solomon Islands", "Sri Lanka", "Thailand",
    "Timor-Leste", "Tonga", "Tuvalu", "Vanuatu", "Vietnam"
]

# Filter for Asia-Pacific countries
df = df[df["country"].isin(asia_pacific)]

# Create interactive plot
fig = px.line(df, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in Asia-Pacific",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()

### 4. Line Graph For European Region

In [5]:
#Line graph for European region

import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = "SM.POP.NETM"
data = wbdata.get_data(indicator, country="all")

# Convert to DataFrame
df = pd.DataFrame.from_records(
    [(entry['country']['value'], entry['date'], entry['value']) for entry in data if entry['value'] is not None],
    columns=["country", "date", "Net Migration"]
)

# Convert 'date' to integer for sorting
df["date"] = df["date"].astype(int)

# Define European countries
europe_countries = [
    "Albania", "Andorra", "Austria", "Belarus", "Belgium", "Bosnia and Herzegovina",
    "Bulgaria", "Croatia", "Cyprus", "Czechia", "Denmark", "Estonia", "Finland", "France",
    "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy", "Kosovo", "Latvia",
    "Liechtenstein", "Lithuania", "Luxembourg", "Malta", "Moldova", "Monaco", "Montenegro",
    "Netherlands", "North Macedonia", "Norway", "Poland", "Portugal", "Romania",
    "San Marino", "Serbia", "Slovak Republic", "Slovenia", "Spain", "Sweden", "Switzerland",
    "Ukraine", "United Kingdom", "Turkiye"
]

# Filter for European countries
df = df[df["country"].isin(europe_countries)]

# Create an interactive plot
fig = px.line(df, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in Europe",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()

### 5. Line Graph For Middle Eastern Region

In [6]:
#Line graph for Middle Eastern region

import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = "SM.POP.NETM"
data = wbdata.get_data(indicator, country="all")

# Convert to DataFrame
df = pd.DataFrame.from_records(
    [(entry['country']['value'], entry['date'], entry['value']) for entry in data if entry['value'] is not None],
    columns=["country", "date", "Net Migration"]
)

# Convert 'date' to integer for sorting
df["date"] = df["date"].astype(int)

# Define Middle East countries
middle_east_countries = [
    "Bahrain", "Iran, Islamic Rep.", "Iraq", "Israel", "Jordan", "Kuwait",
    "Lebanon", "Oman", "Qatar", "Saudi Arabia", "Syrian Arab Republic",
    "United Arab Emirates", "West Bank and Gaza", "Yemen, Rep."
]

# Filter for Middle East countries
df = df[df["country"].isin(middle_east_countries)]

# Create an interactive plot
fig = px.line(df, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in the Middle East",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()

### 6. Line Graph For The Americas Region

In [7]:
#Line graph for The Americas region

import wbdata
import pandas as pd
import plotly.express as px

# Fetch Net Migration data
indicator = "SM.POP.NETM"
data = wbdata.get_data(indicator, country="all")

# Convert to DataFrame
df = pd.DataFrame.from_records(
    [(entry['country']['value'], entry['date'], entry['value']) for entry in data if entry['value'] is not None],
    columns=["country", "date", "Net Migration"]
)

# Convert 'date' to integer for sorting
df["date"] = df["date"].astype(int)

# Define country groups
north_america = ["United States", "Canada", "Mexico"]
central_america = ["Belize", "Costa Rica", "El Salvador", "Guatemala", "Honduras", "Nicaragua", "Panama"]
south_america = ["Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Guyana",
                 "Paraguay", "Peru", "Suriname", "Uruguay", "Venezuela"]

# Assign regions
df["Region"] = df["country"].apply(lambda x: "North America" if x in north_america else
                                   "Central America" if x in central_america else
                                   "South America" if x in south_america else None)

# Filter for relevant countries
df = df.dropna(subset=["Region"])

# Create an interactive plot
fig = px.line(df, x="date", y="Net Migration", color="country",
              title="Net Migration Trends in The Americas",
              labels={"date": "Year", "Net Migration": "Number of Migrants", "country": "Country"})

fig.update_layout(hovermode="x unified", height=500, plot_bgcolor="white", paper_bgcolor="white")

# Show the graph
fig.show()