# Task 3: Event Impact Modeling

Ethiopia Financial Inclusion — Event impacts on Access and Usage.

## Load Data

In [None]:
import pandas as pd
from pathlib import Path
DATA_PATH = Path('../data/processed/ethiopia_fi_unified_data_combined.csv')
df = pd.read_csv(DATA_PATH)

# Separate by record_type
observations = df[df['record_type']=='observation'].copy()
events = df[df['record_type']=='event'].copy()
impact_links = df[df['record_type']=='impact_link'].copy()
print(f"Loaded: {len(df)} rows | obs={len(observations)} events={len(events)} links={len(impact_links)}")


## Join Impact Links with Events

In [None]:
# Ensure keys exist
for c in ['parent_id','record_id','pillar','related_indicator','impact_direction','impact_magnitude','lag_months','evidence_basis']:
    if c not in impact_links.columns:
        print(f"Missing column in impact_links: {c}")

# Merge to get event details
events_ren = events.rename(columns={'record_id':'event_id'})
links = impact_links.merge(events_ren, left_on='parent_id', right_on='event_id', how='left', suffixes=('_link','_event'))
print(f"Joined links: {len(links)}")
links[['parent_id','event_id','pillar','related_indicator','impact_direction','impact_magnitude','lag_months','evidence_basis']].head()


## Event–Indicator Association Matrix

In [None]:
import numpy as np

# Map magnitudes
def to_numeric_mag(x):
    try:
        return float(x)
    except Exception:
        s = str(x).strip().lower()
        return {'low':0.5,'medium':1.0,'high':1.5}.get(s, 1.0)

links['mag_num'] = links['impact_magnitude'].apply(to_numeric_mag)
links['dir_num'] = links['impact_direction'].map({'positive':1,'negative':-1}).fillna(1)
links['effect'] = links['mag_num'] * links['dir_num']

# Matrix: rows=event_id, cols=related_indicator (indicator_code preferred)
col_name = 'related_indicator' if 'related_indicator' in links.columns else 'indicator_code'
mat = links.pivot_table(index='event_id', columns=col_name, values='effect', aggfunc='sum', fill_value=0)
print(f"Matrix shape: {mat.shape}")
mat.head()


## Functional Form: Time-Distributed Event Effects

In [None]:
import pandas as pd

# Example: ramp effect over lag_months
# effect_t = effect * min(1, t/lag_months) for t>=0

def ramp_effect(effect, lag_months, t_months):
    if pd.isna(lag_months) or lag_months <= 0:
        return effect
    return effect * min(1.0, max(0.0, t_months/lag_months))

# Build per-event timelines (monthly)
if 'event_date' in links.columns:
    links['event_date'] = pd.to_datetime(links['event_date'], errors='coerce')
    start = links['event_date'].min()
    end = pd.Timestamp('2024-12-31')
    idx = pd.date_range(start, end, freq='MS')
    timelines = {}
    for eid, grp in links.groupby('event_id'):
        s = pd.Series(0.0, index=idx)
        for _, r in grp.iterrows():
            lag = r.get('lag_months', 0) or 0
            for t in idx:
                t_months = (t - r['event_date']).days // 30
                s.loc[t] += ramp_effect(r['effect'], lag, t_months)
        timelines[eid] = s
    # Combine by indicator using links mapping
    print(f"Built {len(timelines)} event timelines")


## Validation: Telebirr and M-Pesa Impacts vs Observed

In [None]:
# Find Telebirr and M-Pesa events (heuristics)
telebirr = events[events.apply(lambda r: 'telebirr' in ' '.join(map(str, r.values)).lower(), axis=1)]
mpesa = events[events.apply(lambda r: 'm-pesa' in ' '.join(map(str, r.values)).lower(), axis=1)]
print(f"Telebirr events: {len(telebirr)} | M-Pesa events: {len(mpesa)}")

# Observed mobile money account ownership trajectory
obs_mm = observations[observations['indicator_code']=='ACC_MM_ACCOUNT'].copy()
obs_mm['observation_date'] = pd.to_datetime(obs_mm['observation_date'], errors='coerce')
obs_mm = obs_mm.sort_values('observation_date')
obs_mm[['observation_date','value_numeric']].tail()


## Methodology, Assumptions, and Limitations

- Effects modeled as ramp-up over `lag_months` then capped.
- `impact_magnitude`: numeric if provided; otherwise mapped low/medium/high to 0.5/1.0/1.5.
- Combined effects summed across events per indicator.
- Validation compares modeled directionality and rough magnitude to observed changes after key launches.
- Limitations: sparse data, identification challenges, overlapping events, supply vs demand measurement differences.