# Task 3: Event Impact Modeling

Ethiopia Financial Inclusion Forecasting — Event-Indicator association and impact modeling.

## Setup & Data Load

In [None]:
import sys, os
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

plt.style.use('seaborn-v0_8-whitegrid')

DATA_PATH = Path('../data/processed/ethiopia_fi_unified_data_combined.csv')
df = pd.read_csv(DATA_PATH)

# Normalize date columns
for c in ['observation_date','event_date','date','record_date']:
    if c in df.columns:
        df[c] = pd.to_datetime(df[c], errors='coerce')

print('Loaded rows:', len(df))
print('Columns:', sorted(df.columns))


## Separate Records by Type

In [None]:
records = {
    'observations': df[df['record_type']=='observation'].copy(),
    'events': df[df['record_type']=='event'].copy(),
    'impact_links': df[df['record_type']=='impact_link'].copy(),
    'targets': df[df['record_type']=='target'].copy() if 'target' in df['record_type'].unique() else pd.DataFrame()
}
for k,v in records.items():
    print(k, len(v))

# Ensure key columns
imp = records['impact_links']
if 'related_indicator' not in imp.columns and 'indicator_code' in imp.columns:
    imp['related_indicator'] = imp['indicator_code']


## Event-Indicator Association Matrix

In [None]:
events = records['events']
imp = records['impact_links']

# Robust effect selection
import pandas as pd

def _select_effect_column(df: pd.DataFrame):
    for c in ['impact_magnitude','impact_estimate']:
        if c in df.columns:
            s = pd.to_numeric(df[c], errors='coerce')
            if s.notna().any() and (s.abs() > 0).any():
                return c
    return None

col = _select_effect_column(imp)
if col is None:
    imp['effect_value'] = imp['impact_direction'].map({'positive':1,'negative':-1}).fillna(0)
else:
    imp['effect_value'] = pd.to_numeric(imp[col], errors='coerce').fillna(0)

# Join event details with robust event date
key_cols = [c for c in ['record_id','event_name','category','event_date','observation_date','date','period_start'] if c in events.columns]
ev = events[key_cols].rename(columns={'record_id':'event_id'})
if 'event_date' not in ev.columns:
    edc = next((c for c in ['observation_date','date','period_start'] if c in ev.columns), None)
    ev['event_date'] = pd.to_datetime(ev[edc], errors='coerce') if edc else pd.NaT
else:
    ev['event_date'] = pd.to_datetime(ev['event_date'], errors='coerce')

link = imp.rename(columns={'parent_id':'event_id'})
merged = pd.merge(link, ev[['event_id','event_name','event_date','category']] if 'event_name' in ev.columns else ev[['event_id','event_date','category']], on='event_id', how='left')

if 'related_indicator' not in merged.columns and 'indicator_code' in merged.columns:
    merged['related_indicator'] = merged['indicator_code']
row_label = 'event_name' if 'event_name' in merged.columns else 'event_id'
col_label = 'related_indicator'
assoc = merged.pivot_table(index=row_label, columns=col_label, values='effect_value', aggfunc='mean', fill_value=0)

plt.figure(figsize=(12,6))
sns.heatmap(assoc, cmap='RdBu_r', center=0, annot=False)
plt.title('Event-Indicator Association (mean effect)')
plt.tight_layout()

fig_path = Path('../reports/figures/event_indicator_heatmap.png')
os.makedirs(fig_path.parent, exist_ok=True)
plt.savefig(fig_path)
print('Saved heatmap to', fig_path)

assoc.head()


## Modeling Event Impacts Over Time

In [None]:
def build_event_effects(imp: pd.DataFrame, events: pd.DataFrame) -> pd.DataFrame:
    link = imp.rename(columns={'parent_id':'event_id'}).copy()
    ev = events.rename(columns={'record_id':'event_id'}).copy()
    edc = next((c for c in ['event_date','observation_date','date','period_start'] if c in ev.columns), None)
    ev['event_date'] = pd.to_datetime(ev[edc], errors='coerce') if edc else pd.NaT
    merged = pd.merge(link, ev[['event_id','event_date']], on='event_id', how='left')
    col = _select_effect_column(merged)
    if col is None:
        merged['effect_value'] = merged['impact_direction'].map({'positive':1,'negative':-1}).fillna(0)
    else:
        merged['effect_value'] = pd.to_numeric(merged[col], errors='coerce').fillna(0)
    merged['lag_months'] = pd.to_numeric(merged['lag_months'], errors='coerce').fillna(0).astype(int) if 'lag_months' in merged.columns else 0
    if 'related_indicator' not in merged.columns and 'indicator_code' in merged.columns:
        merged['related_indicator'] = merged['indicator_code']
    for req in ['event_id','event_date','related_indicator','effect_value','lag_months']:
        if req not in merged.columns:
            merged[req] = pd.NA
    return merged[['event_id','event_date','related_indicator','effect_value','lag_months']]


## Validation: Telebirr and M-Pesa effects on Mobile Money Accounts

In [None]:
events = records['events']
imp = records['impact_links']
obs = records['observations']

# Build effects
fx = build_event_effects(imp, events)

# Choose indicator for mobile money account ownership
indicator_mm = 'ACC_MM_ACCOUNT' if (obs['indicator_code']=='ACC_MM_ACCOUNT').any() else imp.get('related_indicator').dropna().unique()[0]
series = apply_event_effects_series(obs, fx, indicator_mm)
if not series.empty:
    fig, ax = plt.subplots(figsize=(10,4))
    series['base'].plot(ax=ax, label='Observed (ffill)', color='gray')
    series['predicted'].plot(ax=ax, label='Event-adjusted prediction', color='blue')
    ax.set_title(f'Mobile Money Account Ownership — Event-adjusted prediction ({indicator_mm})')
    ax.legend()
    plt.tight_layout()
    outp = Path('../reports/figures/mm_event_prediction.png')
    plt.savefig(outp)
    print('Saved validation figure to', outp)
else:
    print('No observation series available for indicator:', indicator_mm)


## Methodology, Assumptions, and Sources

- Effects modeled as step changes at event_date + lag_months.
- When `impact_magnitude` is unavailable, direction (+/-) is used with unit effect.
- Percentage indicators treated additively in pp; counts additively in units.
- Validation compares event-adjusted predictions to observed trajectories (e.g., 2021→2024 ACC_MM_ACCOUNT).
- Limitations: sparse observation frequency, potential mismatch between supply-side events and demand-side responses, effect heterogeneity not captured.

## Patch: Robust Effect Selection and Event Date Handling

In [None]:
events = records['events']
imp = records['impact_links']

# Robust effect selection
import pandas as pd

def _select_effect_column(df: pd.DataFrame):
    for c in ['impact_magnitude','impact_estimate']:
        if c in df.columns:
            s = pd.to_numeric(df[c], errors='coerce')
            if s.notna().any() and (s.abs() > 0).any():
                return c
    return None

col = _select_effect_column(imp)
if col is None:
    imp['effect_value'] = imp['impact_direction'].map({'positive':1,'negative':-1}).fillna(0)
else:
    imp['effect_value'] = pd.to_numeric(imp[col], errors='coerce').fillna(0)

# Join event details with robust event date
key_cols = [c for c in ['record_id','event_name','category','event_date','observation_date','date','period_start'] if c in events.columns]
ev = events[key_cols].rename(columns={'record_id':'event_id'})
if 'event_date' not in ev.columns:
    edc = next((c for c in ['observation_date','date','period_start'] if c in ev.columns), None)
    ev['event_date'] = pd.to_datetime(ev[edc], errors='coerce') if edc else pd.NaT
else:
    ev['event_date'] = pd.to_datetime(ev['event_date'], errors='coerce')

link = imp.rename(columns={'parent_id':'event_id'})
merged = pd.merge(link, ev[['event_id','event_name','event_date','category']] if 'event_name' in ev.columns else ev[['event_id','event_date','category']], on='event_id', how='left')

if 'related_indicator' not in merged.columns and 'indicator_code' in merged.columns:
    merged['related_indicator'] = merged['indicator_code']
row_label = 'event_name' if 'event_name' in merged.columns else 'event_id'
col_label = 'related_indicator'
assoc = merged.pivot_table(index=row_label, columns=col_label, values='effect_value', aggfunc='mean', fill_value=0)

plt.figure(figsize=(12,6))
sns.heatmap(assoc, cmap='RdBu_r', center=0, annot=False)
plt.title('Event-Indicator Association (mean effect)')
plt.tight_layout()

fig_path = Path('../reports/figures/event_indicator_heatmap.png')
os.makedirs(fig_path.parent, exist_ok=True)
plt.savefig(fig_path)
print('Saved heatmap to', fig_path)

assoc.head()


## Patch: Robust build_event_effects()

In [None]:
def build_event_effects(imp: pd.DataFrame, events: pd.DataFrame) -> pd.DataFrame:
    link = imp.rename(columns={'parent_id':'event_id'}).copy()
    ev = events.rename(columns={'record_id':'event_id'}).copy()
    edc = next((c for c in ['event_date','observation_date','date','period_start'] if c in ev.columns), None)
    ev['event_date'] = pd.to_datetime(ev[edc], errors='coerce') if edc else pd.NaT
    merged = pd.merge(link, ev[['event_id','event_date']], on='event_id', how='left')
    col = _select_effect_column(merged)
    if col is None:
        merged['effect_value'] = merged['impact_direction'].map({'positive':1,'negative':-1}).fillna(0)
    else:
        merged['effect_value'] = pd.to_numeric(merged[col], errors='coerce').fillna(0)
    merged['lag_months'] = pd.to_numeric(merged['lag_months'], errors='coerce').fillna(0).astype(int) if 'lag_months' in merged.columns else 0
    if 'related_indicator' not in merged.columns and 'indicator_code' in merged.columns:
        merged['related_indicator'] = merged['indicator_code']
    for req in ['event_id','event_date','related_indicator','effect_value','lag_months']:
        if req not in merged.columns:
            merged[req] = pd.NA
    return merged[['event_id','event_date','related_indicator','effect_value','lag_months']]


## Re-run Validation with Robust Effects

In [None]:
fx = build_event_effects(records['impact_links'], records['events'])
indicator_mm = 'ACC_MM_ACCOUNT' if (records['observations']['indicator_code']=='ACC_MM_ACCOUNT').any() else fx['related_indicator'].dropna().unique()[0]
series = apply_event_effects_series(records['observations'], fx, indicator_mm)
if not series.empty:
    fig, ax = plt.subplots(figsize=(10,4))
    series['base'].plot(ax=ax, label='Observed (ffill)', color='gray')
    series['predicted'].plot(ax=ax, label='Event-adjusted prediction', color='blue')
    ax.set_title(f'Mobile Money Account Ownership — Event-adjusted prediction ({indicator_mm})')
    ax.legend()
    plt.tight_layout()
    outp = Path('../reports/figures/mm_event_prediction.png')
    plt.savefig(outp)
    print('Saved validation figure to', outp)
else:
    print('No observation series available or event dates missing; cannot validate.')
