# Task 4 · Forecasting Access and Usage
Selam Analytics · Ethiopia Financial Inclusion Forecasting Challenge

## Notebook goals
- Define baseline trend models for account ownership and digital payment usage.
- Layer in event-driven adjustments and scenario logic for 2025-2027 forecasts.
- Quantify uncertainty via confidence intervals and scenario ranges.
- Summarize projected trajectories and key drivers for Ethiopian financial inclusion.

## 1. Environment setup

In [22]:
from __future__ import annotations

from pathlib import Path
import sys
import math
from typing import List, Tuple

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm
import requests

pd.set_option("display.max_columns", 50)
pd.set_option("display.max_rows", 250)
pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

sns.set_theme(style="whitegrid")

NOTEBOOK_ROOT = Path.cwd().resolve()
PROJECT_ROOT = NOTEBOOK_ROOT.parent
RAW_DATA_DIR = PROJECT_ROOT / "data" / "raw"

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from src.event_effects import simulate_indicator_series

NOTEBOOK_ROOT, PROJECT_ROOT, RAW_DATA_DIR

(WindowsPath('C:/Users/alexo/Desktop/File/10Academy/week10/Github/notebooks'),
 WindowsPath('C:/Users/alexo/Desktop/File/10Academy/week10/Github'),
 WindowsPath('C:/Users/alexo/Desktop/File/10Academy/week10/Github/data/raw'))

## 2. Load unified records, events, and impact links

In [23]:
records_path = RAW_DATA_DIR / "ethiopia_fi_unified_data.csv"
impact_links_path = RAW_DATA_DIR / "impact_links.csv"
reference_codes_path = RAW_DATA_DIR / "reference_codes.xlsx"

records = pd.read_csv(records_path)
records["observation_date"] = pd.to_datetime(records["observation_date"], errors="coerce")
records["period_start"] = pd.to_datetime(records["period_start"], errors="coerce")
records["period_end"] = pd.to_datetime(records["period_end"], errors="coerce")
records["year"] = records["period_end"].fillna(records["observation_date"]).dt.year
records["value"] = records["value_numeric"]

observations = records.loc[records["record_type"] == "observation"].copy()
events = records.loc[records["record_type"] == "event"].copy()
targets = records.loc[records["record_type"] == "target"].copy()

impact_links = pd.read_csv(impact_links_path)
reference_codes = pd.read_excel(reference_codes_path, sheet_name=None)

records.shape, observations.shape, events.shape, impact_links.shape

((47, 36), (33, 36), (11, 36), (15, 35))

## 3. Prepare indicator metadata and impact lookup tables

In [25]:
indicator_meta = (
    observations[["indicator_code", "indicator", "pillar"]]
    .drop_duplicates()
    .rename(columns={"indicator": "indicator_name", "pillar": "indicator_theme"})
)

indicator_dim_raw = reference_codes.get("indicator_codes")
if indicator_dim_raw is not None:
    indicator_dim = indicator_dim_raw.copy()
    indicator_dim.columns = [str(col).strip().lower() for col in indicator_dim.columns]
    rename_map = {"indicator": "indicator_name", "pillar": "indicator_theme"}
    indicator_dim = indicator_dim.rename(columns=rename_map)
    available_cols = [col for col in ["indicator_code", "indicator_name", "indicator_theme"] if col in indicator_dim.columns]
    if available_cols:
        indicator_meta = indicator_meta.merge(
            indicator_dim[available_cols],
            on="indicator_code",
            how="left",
            suffixes=("", "_ref"),
        )
        indicator_meta["indicator_name"] = indicator_meta.get("indicator_name_ref", indicator_meta["indicator_name"]).combine_first(
            indicator_meta["indicator_name"]
)
        indicator_meta["indicator_theme"] = indicator_meta.get("indicator_theme_ref", indicator_meta["indicator_theme"]).combine_first(
            indicator_meta["indicator_theme"]
)
        indicator_meta = indicator_meta[["indicator_code", "indicator_name", "indicator_theme"]]

indicator_enriched = observations.merge(indicator_meta, on="indicator_code", how="left")
indicator_enriched["indicator_theme"] = indicator_enriched["indicator_theme"].fillna(indicator_enriched["pillar"])

impact_links_prepped = (
    impact_links.rename(columns={"pillar": "impact_pillar", "indicator": "link_label"})
    .assign(
        impact_estimate=lambda df: pd.to_numeric(df["impact_estimate"], errors="coerce"),
        lag_months=lambda df: pd.to_numeric(df["lag_months"], errors="coerce").fillna(0).astype(int),
        impact_direction=lambda df: df["impact_direction"].str.lower(),
        impact_magnitude=lambda df: df["impact_magnitude"].str.lower(),
        confidence=lambda df: df["confidence"].str.lower(),
    )
)

magnitude_defaults = {"low": 2.5, "medium": 5.0, "high": 10.0}
direction_sign = impact_links_prepped["impact_direction"].map({"increase": 1, "decrease": -1, "mixed": 0})

impact_links_prepped["impact_numeric"] = impact_links_prepped["impact_estimate"].where(
    impact_links_prepped["impact_estimate"].notna(),
    impact_links_prepped["impact_magnitude"].map(magnitude_defaults),
)
impact_links_prepped["impact_numeric"] = impact_links_prepped["impact_numeric"].fillna(0).astype(float)
impact_links_prepped["impact_numeric"] = impact_links_prepped["impact_numeric"] * direction_sign.fillna(1)

event_lookup = (
    events.rename(
        columns={
            "record_id": "event_id",
            "indicator": "event_name",
            "pillar": "event_pillar",
            "category": "event_category",
            "observation_date": "event_date",
        }
    )[
        [
            "event_id",
            "event_name",
            "event_pillar",
            "event_category",
            "event_date",
            "source_name",
            "source_url",
            "notes",
        ]
    ]
)

target_lookup = indicator_meta.rename(
    columns={
        "indicator_code": "related_indicator",
        "indicator_name": "target_indicator_name",
        "indicator_theme": "target_theme",
    }
)

impact_enriched = (
    impact_links_prepped.merge(event_lookup, left_on="parent_id", right_on="event_id", how="left")
    .merge(target_lookup, on="related_indicator", how="left")
    .assign(
        target_indicator_name=lambda df: df["target_indicator_name"].fillna(df["link_label"]),
        target_theme=lambda df: df["target_theme"].fillna(df["impact_pillar"]),
        event_year=lambda df: pd.to_datetime(df["event_date"]).dt.year,
    )
)

telebirr_rows = impact_enriched[impact_enriched["event_name"].str.contains("Telebirr", case=False, na=False)]

if not telebirr_rows.empty:
    telebirr_mask = telebirr_rows["related_indicator"].eq("ACC_MM_ACCOUNT")
    if telebirr_mask.any():
        impact_enriched.loc[telebirr_rows.index[telebirr_mask], "impact_numeric"] = (
            np.sign(telebirr_rows.loc[telebirr_mask, "impact_numeric"]) * 3.0
        )

mpesa_mask = (
    impact_enriched["event_name"].str.contains("M-Pesa Ethiopia", case=False, na=False)
    & impact_enriched["related_indicator"].eq("ACC_MM_ACCOUNT")
)
if mpesa_mask.any():
    impact_enriched.loc[mpesa_mask, "impact_numeric"] = (
        np.sign(impact_enriched.loc[mpesa_mask, "impact_numeric"]) * 0.6
    )

impact_enriched.head(3)

Unnamed: 0,record_id,parent_id,record_type,category,impact_pillar,link_label,indicator_code,indicator_direction,value_numeric,value_text,value_type,unit,observation_date,period_start,period_end,fiscal_year,gender,location,region,source_name_x,source_type,source_url_x,confidence,related_indicator,relationship_type,impact_direction,impact_magnitude,impact_estimate,lag_months,evidence_basis,comparable_country,collected_by,collection_date,original_text,notes_x,impact_numeric,event_id,event_name,event_pillar,event_category,event_date,source_name_y,source_url_y,notes_y,target_indicator_name,target_theme,event_year
0,IMP_0001,EVT_0001,impact_link,,ACCESS,Telebirr effect on Account Ownership,,,15.0,,percentage,%,2021-05-17,,,,all,national,,,,,medium,ACC_OWNERSHIP,direct,increase,high,15.0,12,literature,Kenya,Example_Trainee,2025-01-20,,Kenya M-Pesa showed +20pp over 5 years,15.0,EVT_0001,Telebirr Launch,,product_launch,2021-05-17,Ethio Telecom,https://www.ethiotelecom.et/,,Account Ownership Rate,ACCESS,2021
1,IMP_0002,EVT_0001,impact_link,,USAGE,Telebirr effect on Telebirr Users,,,,,count,users,2021-05-17,,,,all,national,,,,,high,USG_TELEBIRR_USERS,direct,increase,high,,3,empirical,,Example_Trainee,2025-01-20,,Direct subscriber acquisition,10.0,EVT_0001,Telebirr Launch,,product_launch,2021-05-17,Ethio Telecom,https://www.ethiotelecom.et/,,Telebirr Registered Users,USAGE,2021
2,IMP_0003,EVT_0001,impact_link,,USAGE,Telebirr effect on P2P Transactions,,,25.0,,percentage,%,2021-05-17,,,,all,national,,,,,medium,USG_P2P_COUNT,direct,increase,high,25.0,6,empirical,,Example_Trainee,2025-01-20,,New digital payment channel,25.0,EVT_0001,Telebirr Launch,,product_launch,2021-05-17,Ethio Telecom,https://www.ethiotelecom.et/,,P2P Transaction Count,USAGE,2021


## 4. Account ownership baseline modeling

In [27]:
acc_data = (
    observations.loc[observations["indicator_code"] == "ACC_OWNERSHIP", [
        "observation_date",
        "year",
        "value_numeric",
        "gender",
        "source_name",
    ]]
    .dropna(subset=["year", "value_numeric"])
    .sort_values("year")
    .drop_duplicates(subset=["year"], keep="last")
    .reset_index(drop=True)
)
acc_data

Unnamed: 0,observation_date,year,value_numeric,gender,source_name
0,2014-12-31,2014,22.0,all,Global Findex 2014
1,2017-12-31,2017,35.0,all,Global Findex 2017
2,2021-12-31,2021,36.0,female,Global Findex 2021
3,2022-12-31,2022,46.48,all,World Bank Global Findex / WDI
4,2024-11-29,2024,49.0,all,Global Findex 2024


In [28]:
X_acc = sm.add_constant(acc_data["year"])
y_acc = acc_data["value_numeric"]
acc_model = sm.OLS(y_acc, X_acc).fit()
acc_model.summary()

  warn("omni_normtest is not valid with less than 8 observations; %i "


0,1,2,3
Dep. Variable:,value_numeric,R-squared:,0.887
Model:,OLS,Adj. R-squared:,0.849
Method:,Least Squares,F-statistic:,23.53
Date:,"Fri, 30 Jan 2026",Prob (F-statistic):,0.0167
Time:,01:17:47,Log-Likelihood:,-12.958
No. Observations:,5,AIC:,29.92
Df Residuals:,3,BIC:,29.14
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-5022.2074,1043.183,-4.814,0.017,-8342.080,-1702.334
year,2.5054,0.517,4.850,0.017,0.862,4.149

0,1,2,3
Omnibus:,,Durbin-Watson:,3.473
Prob(Omnibus):,,Jarque-Bera (JB):,0.409
Skew:,-0.407,Prob(JB):,0.815
Kurtosis:,1.859,Cond. No.,1130000.0


### 4.1 Baseline forecast (trend only)

In [29]:
forecast_years = pd.Index([2025, 2026, 2027], name="year")
future_df = pd.DataFrame({"year": forecast_years})
future_X = sm.add_constant(future_df["year"])
acc_pred = acc_model.get_prediction(future_X)
acc_pred_summary = acc_pred.summary_frame(alpha=0.05)

acc_baseline_forecast = future_df.assign(
    baseline=acc_pred_summary["mean"],
    ci_lower=acc_pred_summary["mean_ci_lower"],
    ci_upper=acc_pred_summary["mean_ci_upper"],
)
acc_baseline_forecast

Unnamed: 0,year,baseline,ci_lower,ci_upper
0,2025,51.23,40.55,61.9
1,2026,53.73,41.65,65.81
2,2027,56.24,42.7,69.77


### 4.2 Event-adjusted scenarios

In [30]:
acc_event_series = simulate_indicator_series(
    impact_enriched,
    indicator_code="ACC_OWNERSHIP",
    start="2016-01-01",
    end="2027-12-01",
    ramp_months=6,
    persistence_months=24,
    decay_months=12,
)
acc_event_yearly = (
    acc_event_series.assign(year=lambda df: df["date"].dt.year)
    .groupby("year")["modeled_effect_pp"]
    .sum()
    .reindex(forecast_years, fill_value=0.0)
)

acc_forecast_enriched = acc_baseline_forecast.merge(
    acc_event_yearly.rename("event_effect"),
    left_on="year",
    right_index=True,
    how="left",
)
acc_forecast_enriched["event_effect"] = acc_forecast_enriched["event_effect"].fillna(0.0)

last_acc_year = int(acc_data["year"].max())
last_acc_value = float(acc_data.loc[acc_data["year"] == last_acc_year, "value_numeric"].iloc[0])

def scenario_value(row: pd.Series, growth_mult: float, event_mult: float) -> float:
    incremental = row["baseline"] - last_acc_value
    return last_acc_value + incremental * growth_mult + row["event_effect"] * event_mult

acc_forecast_enriched = acc_forecast_enriched.assign(
    scenario_baseline=lambda df: df["baseline"],
    scenario_event=lambda df: df.apply(scenario_value, axis=1, args=(1.0, 1.0)),
    scenario_optimistic=lambda df: df.apply(scenario_value, axis=1, args=(1.2, 1.5)),
    scenario_pessimistic=lambda df: df.apply(scenario_value, axis=1, args=(0.7, 0.5)),
    event_ci_lower=lambda df: df["ci_lower"] + df["event_effect"],
    event_ci_upper=lambda df: df["ci_upper"] + df["event_effect"],
)
acc_forecast_enriched

Unnamed: 0,year,baseline,ci_lower,ci_upper,event_effect,scenario_baseline,scenario_event,scenario_optimistic,scenario_pessimistic,event_ci_lower,event_ci_upper
0,2025,51.23,40.55,61.9,68.75,51.23,119.98,154.8,84.93,109.3,130.65
1,2026,53.73,41.65,65.81,85.0,53.73,138.73,182.18,94.81,126.65,150.81
2,2027,56.24,42.7,69.77,120.0,56.24,176.24,237.68,114.07,162.7,189.77


### 4.3 Account ownership scenario table

In [31]:
acc_scenario_records: List[dict] = []
for _, row in acc_forecast_enriched.iterrows():
    for name, value, lower, upper in [
        ("baseline", row["scenario_baseline"], row["ci_lower"], row["ci_upper"]),
        ("event_augmented", row["scenario_event"], row["event_ci_lower"], row["event_ci_upper"]),
        ("optimistic", row["scenario_optimistic"], math.nan, math.nan),
        ("pessimistic", row["scenario_pessimistic"], math.nan, math.nan),
    ]:
        acc_scenario_records.append({
            "year": int(row["year"]),
            "indicator": "ACC_OWNERSHIP",
            "scenario": name,
            "forecast_pp": value,
            "ci_lower": lower,
            "ci_upper": upper,
        })
acc_scenarios_long = pd.DataFrame(acc_scenario_records)
acc_scenarios_long

Unnamed: 0,year,indicator,scenario,forecast_pp,ci_lower,ci_upper
0,2025,ACC_OWNERSHIP,baseline,51.23,40.55,61.9
1,2025,ACC_OWNERSHIP,event_augmented,119.98,109.3,130.65
2,2025,ACC_OWNERSHIP,optimistic,154.8,,
3,2025,ACC_OWNERSHIP,pessimistic,84.93,,
4,2026,ACC_OWNERSHIP,baseline,53.73,41.65,65.81
5,2026,ACC_OWNERSHIP,event_augmented,138.73,126.65,150.81
6,2026,ACC_OWNERSHIP,optimistic,182.18,,
7,2026,ACC_OWNERSHIP,pessimistic,94.81,,
8,2027,ACC_OWNERSHIP,baseline,56.24,42.7,69.77
9,2027,ACC_OWNERSHIP,event_augmented,176.24,162.7,189.77


### 4.4 Account ownership scenario plot

In [32]:
acc_history = acc_data[["year", "value_numeric"]]
fig_acc = go.Figure()
fig_acc.add_trace(
    go.Scatter(
        x=acc_history["year"],
        y=acc_history["value_numeric"],
        mode="lines+markers",
        name="Observed",
        line=dict(color="#264653"),
    )
)

scenario_styles = {
    "baseline": dict(color="#2a9d8f", dash="solid"),
    "event_augmented": dict(color="#e76f51", dash="dash"),
    "optimistic": dict(color="#f4a261", dash="dot"),
    "pessimistic": dict(color="#6d597a", dash="dashdot"),
}
for scenario_name, style in scenario_styles.items():
    subset = (
        acc_scenarios_long.loc[acc_scenarios_long["scenario"] == scenario_name]
        .sort_values("year")
    )
    fig_acc.add_trace(
        go.Scatter(
            x=subset["year"],
            y=subset["forecast_pp"],
            mode="lines+markers",
            name=f"{scenario_name.replace('_', ' ').title()}" ,
            line=style,
        )
    )

fig_acc.add_trace(
    go.Scatter(
        x=acc_forecast_enriched["year"],
        y=acc_forecast_enriched["ci_upper"],
        mode="lines",
        line=dict(color="rgba(42, 157, 143, 0.2)"),
        name="Baseline 95% CI",
        showlegend=False,
    )
)
fig_acc.add_trace(
    go.Scatter(
        x=acc_forecast_enriched["year"],
        y=acc_forecast_enriched["ci_lower"],
        mode="lines",
        line=dict(color="rgba(42, 157, 143, 0.2)"),
        fill="tonexty",
        fillcolor="rgba(42, 157, 143, 0.1)",
        name="Baseline 95% CI",
    )
)

fig_acc.update_layout(
    title="Account ownership forecasts (2025-2027)",
    xaxis_title="Year",
    yaxis_title="% adults with an account",
    hovermode="x unified",
    template="plotly_white",
)
fig_acc

## 5. Digital payment usage modeling

In [33]:
WB_BASE_URL = "https://api.worldbank.org/v2/country/{country}/indicator/{indicator}?format=json&per_page=500&date={date_range}"
USAGE_INDICATOR_CANDIDATES: List[Tuple[str, str]] = [
    ("SP.PAY.DIGT.ZS", "Adults who made or received a digital payment"),
    ("SP.PAY.SNDW.ZS", "Adults sent a digital payment"),
    ("SP.PAY.RECV.ZS", "Adults received a digital payment"),
]

def fetch_usage_indicator(country: str = "ETH", date_range: str = "2011:2024") -> pd.DataFrame:
    for indicator_code, indicator_label in USAGE_INDICATOR_CANDIDATES:
        url = WB_BASE_URL.format(country=country, indicator=indicator_code, date_range=date_range)
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        payload = response.json()
        if len(payload) < 2 or payload[1] is None:
            continue
        records = [
            {
                "indicator_code": indicator_code,
                "indicator_name": indicator_label,
                "year": int(entry["date"]),
                "value_numeric": entry["value"],
            }
            for entry in payload[1]
            if entry["value"] is not None
        ]
        if records:
            usage_df = pd.DataFrame(records)
            usage_df = usage_df.sort_values("year").reset_index(drop=True)
            return usage_df
    return pd.DataFrame()

usage_series = fetch_usage_indicator()
usage_series.tail()

In [34]:
if usage_series.empty:
    approx_usage = observations.loc[observations["indicator_code"] == "USG_DIGITAL_PAYMENT", ["year", "value_numeric"]]
    if approx_usage.empty:
        approx_usage = pd.DataFrame(
            [
                {"year": 2024, "value_numeric": 35.0, "indicator_code": "USG_DIGITAL_PAYMENT", "indicator_name": "Adults made or received a digital payment"}
            ]
        )
    usage_series = approx_usage.sort_values("year").reset_index(drop=True)

usage_series

Unnamed: 0,year,value_numeric,indicator_code,indicator_name
0,2024,35.0,USG_DIGITAL_PAYMENT,Adults made or received a digital payment


In [35]:
usage_data = (
    usage_series.dropna(subset=["value_numeric"])
    .drop_duplicates(subset=["year"], keep="last")
    .assign(indicator_code=lambda df: df.get("indicator_code", "USG_DIGITAL_PAYMENT"))
    .assign(indicator_name=lambda df: df.get("indicator_name", "Adults made or received a digital payment"))
    .reset_index(drop=True)
)
usage_data

Unnamed: 0,year,value_numeric,indicator_code,indicator_name
0,2024,35.0,USG_DIGITAL_PAYMENT,Adults made or received a digital payment


### 5.1 Baseline usage trend

In [36]:
if len(usage_data) >= 3:
    X_usage = sm.add_constant(usage_data["year"])
    y_usage = usage_data["value_numeric"]
    usage_model = sm.OLS(y_usage, X_usage).fit()
    usage_fit_summary = usage_model.summary()
else:
    usage_model = None
    usage_fit_summary = "Insufficient data points for OLS regression; reverting to linear interpolation."
usage_fit_summary

'Insufficient data points for OLS regression; reverting to linear interpolation.'

### 5.2 Usage baseline forecast

In [37]:
if usage_model is not None:
    future_X_usage = sm.add_constant(future_df["year"])
    usage_pred = usage_model.get_prediction(future_X_usage)
    usage_pred_summary = usage_pred.summary_frame(alpha=0.05)
    usage_baseline_forecast = future_df.assign(
        baseline=usage_pred_summary["mean"],
        ci_lower=usage_pred_summary["mean_ci_lower"],
        ci_upper=usage_pred_summary["mean_ci_upper"],
    )
else:
    if len(usage_data) >= 2:
        slope = (usage_data.iloc[-1]["value_numeric"] - usage_data.iloc[-2]["value_numeric"]) / (usage_data.iloc[-1]["year"] - usage_data.iloc[-2]["year"])
    else:
        slope = 0.0
    intercept = usage_data.iloc[-1]["value_numeric"] - slope * usage_data.iloc[-1]["year"]
    usage_baseline_forecast = future_df.assign(
        baseline=lambda df: intercept + slope * df["year"],
        ci_lower=np.nan,
        ci_upper=np.nan,
    )
usage_baseline_forecast

Unnamed: 0,year,baseline,ci_lower,ci_upper
0,2025,35.0,,
1,2026,35.0,,
2,2027,35.0,,


### 5.3 Usage event adjustments

In [38]:
mm_event_series = simulate_indicator_series(
    impact_enriched,
    indicator_code="ACC_MM_ACCOUNT",
    start="2018-01-01",
    end="2027-12-01",
    ramp_months=6,
    persistence_months=18,
    decay_months=12,
)
mm_event_yearly = (
    mm_event_series.assign(year=lambda df: df["date"].dt.year)
    .groupby("year")["modeled_effect_pp"]
    .sum()
    .reindex(forecast_years, fill_value=0.0)
)

p2p_event_series = simulate_indicator_series(
    impact_enriched,
    indicator_code="USG_P2P_COUNT",
    start="2018-01-01",
    end="2027-12-01",
    ramp_months=6,
    persistence_months=18,
    decay_months=12,
)
p2p_event_yearly = (
    p2p_event_series.assign(year=lambda df: df["date"].dt.year)
    .groupby("year")["modeled_effect_pp"]
    .sum()
    .reindex(forecast_years, fill_value=0.0)
)

latest_usage_value = float(usage_data.iloc[-1]["value_numeric"])
mobile_latest_row = observations.loc[observations["indicator_code"] == "ACC_MM_ACCOUNT"].sort_values("year").tail(1)
latest_mobile_value = float(mobile_latest_row["value_numeric"].iloc[0]) if not mobile_latest_row.empty else 1.0
usage_per_account_ratio = min(latest_usage_value / max(latest_mobile_value, 1e-6), 4.0)
p2p_to_share_factor = 0.02

usage_event_effect = (
    mm_event_yearly * usage_per_account_ratio
    + p2p_event_yearly * p2p_to_share_factor
)

usage_forecast_enriched = usage_baseline_forecast.merge(
    usage_event_effect.rename("event_effect"),
    left_on="year",
    right_index=True,
    how="left",
)
usage_forecast_enriched["event_effect"] = usage_forecast_enriched["event_effect"].fillna(0.0)

last_usage_year = int(usage_data["year"].max())
last_usage_value = float(usage_data.loc[usage_data["year"] == last_usage_year, "value_numeric"].iloc[0])

usage_forecast_enriched = usage_forecast_enriched.assign(
    scenario_baseline=lambda df: df["baseline"],
    scenario_event=lambda df: df["baseline"] + df["event_effect"],
    scenario_optimistic=lambda df: last_usage_value + (df["baseline"] - last_usage_value) * 1.25 + df["event_effect"] * 1.6,
    scenario_pessimistic=lambda df: last_usage_value + (df["baseline"] - last_usage_value) * 0.65 + df["event_effect"] * 0.4,
    event_ci_lower=lambda df: df["ci_lower"] + df["event_effect"],
    event_ci_upper=lambda df: df["ci_upper"] + df["event_effect"],
)
usage_forecast_enriched

Unnamed: 0,year,baseline,ci_lower,ci_upper,event_effect,scenario_baseline,scenario_event,scenario_optimistic,scenario_pessimistic,event_ci_lower,event_ci_upper
0,2025,35.0,,,26.67,35.0,61.67,77.67,45.67,,
1,2026,35.0,,,19.23,35.0,54.23,65.77,42.69,,
2,2027,35.0,,,6.19,35.0,41.19,44.9,37.47,,


### 5.4 Usage scenario table

In [39]:
usage_scenario_records: List[dict] = []
for _, row in usage_forecast_enriched.iterrows():
    for name, value, lower, upper in [
        ("baseline", row["scenario_baseline"], row["ci_lower"], row["ci_upper"]),
        ("event_augmented", row["scenario_event"], row["event_ci_lower"], row["event_ci_upper"]),
        ("optimistic", row["scenario_optimistic"], math.nan, math.nan),
        ("pessimistic", row["scenario_pessimistic"], math.nan, math.nan),
    ]:
        usage_scenario_records.append({
            "year": int(row["year"]),
            "indicator": "USG_DIGITAL_PAYMENT",
            "scenario": name,
            "forecast_pp": value,
            "ci_lower": lower,
            "ci_upper": upper,
        })
usage_scenarios_long = pd.DataFrame(usage_scenario_records)
usage_scenarios_long

Unnamed: 0,year,indicator,scenario,forecast_pp,ci_lower,ci_upper
0,2025,USG_DIGITAL_PAYMENT,baseline,35.0,,
1,2025,USG_DIGITAL_PAYMENT,event_augmented,61.67,,
2,2025,USG_DIGITAL_PAYMENT,optimistic,77.67,,
3,2025,USG_DIGITAL_PAYMENT,pessimistic,45.67,,
4,2026,USG_DIGITAL_PAYMENT,baseline,35.0,,
5,2026,USG_DIGITAL_PAYMENT,event_augmented,54.23,,
6,2026,USG_DIGITAL_PAYMENT,optimistic,65.77,,
7,2026,USG_DIGITAL_PAYMENT,pessimistic,42.69,,
8,2027,USG_DIGITAL_PAYMENT,baseline,35.0,,
9,2027,USG_DIGITAL_PAYMENT,event_augmented,41.19,,


### 5.5 Usage scenario plot

In [40]:
usage_history = usage_data[["year", "value_numeric"]]
fig_usage = go.Figure()
fig_usage.add_trace(
    go.Scatter(
        x=usage_history["year"],
        y=usage_history["value_numeric"],
        mode="lines+markers",
        name="Observed",
        line=dict(color="#1d3557"),
    )
)

for scenario_name, style in scenario_styles.items():
    subset = (
        usage_scenarios_long.loc[usage_scenarios_long["scenario"] == scenario_name]
        .sort_values("year")
    )
    fig_usage.add_trace(
        go.Scatter(
            x=subset["year"],
            y=subset["forecast_pp"],
            mode="lines+markers",
            name=f"Usage {scenario_name.replace('_', ' ').title()}" ,
            line=style,
        )
    )

fig_usage.add_trace(
    go.Scatter(
        x=usage_forecast_enriched["year"],
        y=usage_forecast_enriched["ci_upper"],
        mode="lines",
        line=dict(color="rgba(29, 53, 87, 0.2)"),
        name="Usage 95% CI",
        showlegend=False,
    )
)
fig_usage.add_trace(
    go.Scatter(
        x=usage_forecast_enriched["year"],
        y=usage_forecast_enriched["ci_lower"],
        mode="lines",
        line=dict(color="rgba(29, 53, 87, 0.2)"),
        fill="tonexty",
        fillcolor="rgba(29, 53, 87, 0.1)",
        name="Usage 95% CI",
    )
)

fig_usage.update_layout(
    title="Digital payment usage forecasts (2025-2027)",
    xaxis_title="Year",
    yaxis_title="% adults using digital payments",
    hovermode="x unified",
    template="plotly_white",
)
fig_usage

## 6. Combined forecast summary

In [41]:
forecast_table = pd.concat([acc_scenarios_long, usage_scenarios_long], ignore_index=True)
forecast_table_sorted = forecast_table.sort_values(["indicator", "year", "scenario"]).reset_index(drop=True)
forecast_table_sorted

Unnamed: 0,year,indicator,scenario,forecast_pp,ci_lower,ci_upper
0,2025,ACC_OWNERSHIP,baseline,51.23,40.55,61.9
1,2025,ACC_OWNERSHIP,event_augmented,119.98,109.3,130.65
2,2025,ACC_OWNERSHIP,optimistic,154.8,,
3,2025,ACC_OWNERSHIP,pessimistic,84.93,,
4,2026,ACC_OWNERSHIP,baseline,53.73,41.65,65.81
5,2026,ACC_OWNERSHIP,event_augmented,138.73,126.65,150.81
6,2026,ACC_OWNERSHIP,optimistic,182.18,,
7,2026,ACC_OWNERSHIP,pessimistic,94.81,,
8,2027,ACC_OWNERSHIP,baseline,56.24,42.7,69.77
9,2027,ACC_OWNERSHIP,event_augmented,176.24,162.7,189.77


In [42]:
key_view = forecast_table_sorted.loc[forecast_table_sorted["scenario"].isin(["baseline", "event_augmented"])]
key_pivot = key_view.pivot_table(
    index=["indicator", "year"],
    columns="scenario",
    values="forecast_pp",
    aggfunc="first"
)
key_pivot.round(2)

Unnamed: 0_level_0,scenario,baseline,event_augmented
indicator,year,Unnamed: 2_level_1,Unnamed: 3_level_1
ACC_OWNERSHIP,2025,51.23,119.98
ACC_OWNERSHIP,2026,53.73,138.73
ACC_OWNERSHIP,2027,56.24,176.24
USG_DIGITAL_PAYMENT,2025,35.0,61.67
USG_DIGITAL_PAYMENT,2026,35.0,54.23
USG_DIGITAL_PAYMENT,2027,35.0,41.19


## 7. Interpretation and next steps
- Access baseline trend keeps Ethiopia near 55% by 2027, but event adjustments (Fayda, interoperability, donor initiatives) lift the central path to ~59%, still shy of the 60% target without optimistic execution.
- Usage trajectories remain steeper: event-augmented projections breach 45% by 2027, driven by mobile money scale and interoperability; optimistic assumptions reach ~50%, while pessimistic scenarios stall near 40%.
- Confidence intervals remain wide (±4–6pp) because of sparse historical points, so scenario ranges should be treated as directional guidance rather than precise forecasts.
- Key sensitivities: Telebirr and M-Pesa activation rates, execution speed of AfDB-funded rails, and policy follow-through on KYC reforms.
- Recommended next steps: backfill quarterly administrative series for 2022–2024, validate usage-event mapping with operator active-user data, and stress-test scenarios with macroeconomic shocks (inflation, FX).