<a href="https://colab.research.google.com/github/fidelis2025/stankelly-website/blob/main/Python%20Code%20to%20Downloads%20the%20correct%20indicators%20directly%20from%20the%20World%20Bank%20API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##### Copyright 2025 Google LLC.

In [None]:
pip install pandas_datareader



In [None]:
import pandas as pd
import numpy as np
from pandas_datareader import wb

# -----------------------------
# STEP 1: DOWNLOAD DATA FROM WORLD BANK
# -----------------------------

INDICATORS = {
    "SP.POP.TOTL.MA.IN": "male_pop",
    "SP.POP.TOTL.FE.IN": "female_pop"
}

# Replace "NGA" with another ISO country code if needed
country_code = "NGA"

data = wb.download(
    indicator=list(INDICATORS.keys()),
    country=country_code,
    start=1960,
    end=2025
)

data.rename(columns=INDICATORS, inplace=True)
data.reset_index(inplace=True)

# Keep only Year and Values
df = data[["year", "male_pop", "female_pop"]]
df.rename(columns={"year": "Year"}, inplace=True)

# Convert 'Year' column to numeric type
df["Year"] = pd.to_numeric(df["Year"])

df = df.sort_values("Year").reset_index(drop=True)

# -----------------------------
# STEP 2: COMPUTE GROWTH RATES AND GENDER RATIO
# -----------------------------

df["male_growth"] = df["male_pop"].pct_change()
df["female_growth"] = df["female_pop"].pct_change()
df["gender_ratio_MF"] = df["male_pop"] / df["female_pop"]

# -----------------------------
# STEP 3: JOINT TREND REGRESSION WITH GENDER FACTOR
# -----------------------------

import statsmodels.formula.api as smf

long_df = pd.DataFrame({
    "Year": list(df["Year"]) + list(df["Year"]),
    "Population": list(df["male_pop"]) + list(df["female_pop"]),
    "Gender": ["male"] * len(df) + ["female"] * len(df)
})

long_df["Gender_binary"] = (long_df["Gender"] == "male").astype(int)

trend_model = smf.ols(
    "Population ~ Year + Gender_binary + Year:Gender_binary",
    data=long_df
).fit()

print("\n=== TREND REGRESSION RESULTS ===\n")
print(trend_model.summary())

# -----------------------------
# STEP 4: LOGISTIC NON LINEAR MODEL PER GENDER
# -----------------------------

from scipy.optimize import curve_fit

def logistic(t, K, r, t0):
    return K / (1 + np.exp(-r * (t - t0)))

years = df["Year"].values

# Male fit
popt_male, pcov_male = curve_fit(
    logistic,
    years,
    df["male_pop"],
    maxfev=20000
)

# Female fit
popt_female, pcov_female = curve_fit(
    logistic,
    years,
    df["female_pop"],
    maxfev=20000
)

# Confidence intervals
se_male = np.sqrt(np.diag(pcov_male))
se_female = np.sqrt(np.diag(pcov_female))

df["male_logistic_fit"] = logistic(years, *popt_male)
df["female_logistic_fit"] = logistic(years, *popt_female)

df["male_resid"] = df["male_pop"] - df["male_logistic_fit"]
df["female_resid"] = df["female_pop"] - df["female_logistic_fit"]

print("\n=== LOGISTIC PARAMETERS (Male) ===")
print("K, r, t0 =", popt_male)
print("Std errors =", se_male)

print("\n=== LOGISTIC PARAMETERS (Female) ===")
print("K, r, t0 =", popt_female)
print("Std errors =", se_female)

# -----------------------------
# STEP 5: ARIMA FORECASTING
# -----------------------------

from statsmodels.tsa.arima.model import ARIMA

male_series = df["male_pop"]
female_series = df["female_pop"]

male_model = ARIMA(male_series, order=(1,1,1)).fit()
female_model = ARIMA(female_series, order=(1,1,1)).fit()

male_forecast = male_model.get_forecast(steps=12)
female_forecast = female_model.get_forecast(steps=12)

male_ci = male_forecast.conf_int()
female_ci = female_forecast.conf_int()

print("\n=== ARIMA FORECAST (Male) ===\n")
print(male_forecast.summary_frame())

print("\n=== ARIMA FORECAST (Female) ===\n")
print(female_forecast.summary_frame())

  data = wb.download(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={"year": "Year"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Year"] = pd.to_numeric(df["Year"])
  popt_male, pcov_male = curve_fit(
  popt_female, pcov_female = curve_fit(



=== TREND REGRESSION RESULTS ===

                            OLS Regression Results                            
Dep. Variable:             Population   R-squared:                       0.957
Model:                            OLS   Adj. R-squared:                  0.956
Method:                 Least Squares   F-statistic:                     928.1
Date:                Mon, 29 Dec 2025   Prob (F-statistic):           1.10e-85
Time:                        02:13:33   Log-Likelihood:                -2209.9
No. Observations:                 130   AIC:                             4428.
Df Residuals:                     126   BIC:                             4439.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
I

  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'



=== ARIMA FORECAST (Male) ===

male_pop          mean        mean_se  mean_ci_lower  mean_ci_upper
65        1.191612e+08   38491.330717   1.190857e+08   1.192366e+08
66        1.206485e+08   54773.981279   1.205412e+08   1.207559e+08
67        1.221359e+08   67496.819588   1.220036e+08   1.222682e+08
68        1.236233e+08   78412.235193   1.234696e+08   1.237770e+08
69        1.251106e+08   88193.900843   1.249378e+08   1.252835e+08
70        1.265980e+08   97184.764279   1.264075e+08   1.267885e+08
71        1.280853e+08  105587.038097   1.278784e+08   1.282923e+08
72        1.295727e+08  113531.484461   1.293502e+08   1.297952e+08
73        1.310600e+08  121108.239430   1.308227e+08   1.312974e+08
74        1.325474e+08  128382.424401   1.322957e+08   1.327990e+08
75        1.340347e+08  135402.817090   1.337693e+08   1.343001e+08
76        1.355220e+08  142207.013892   1.352433e+08   1.358007e+08

=== ARIMA FORECAST (Female) ===

female_pop          mean       mean_se  mean_ci_lo