In [21]:
import numpy as np
import pandas as pd
import os
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)
from src.enrich_impact import DataEnrichment

In [22]:
initial_df = pd.read_csv("../data/raw/impact_sheet.csv")

In [23]:

# 1. Define your PATHS clearly
raw_path = "../data/raw/impact_sheet.csv"
processed_path = "../data/processed/impact_links.csv"

# 2. Load the original data from RAW
if os.path.exists(raw_path):
    initial_df = pd.read_csv(raw_path)
    print(f"✅ Loaded {len(initial_df)} records from raw impact_sheet.")
else:
    initial_df = pd.DataFrame()
    print("⚠️ Warning: raw/impact_sheet.csv not found. Starting with empty data.")

# 3. Initialize the manager with that data
impact_manager = DataEnrichment(initial_df)

# 4. Define the NEW entries you want to add (IMP_0015, etc.)
new_impact_entries = [
    {
        'record_id': 'IMP_0015', 
        'parent_id': 'EVT_0001', 
        'record_type': 'impact_link',
        'pillar': 'USAGE',
        'indicator': 'Telebirr effect on Active MM Accounts',
        'related_indicator': 'USG_MM_ACTIVE',
        'impact_direction': 'increase',
        'impact_magnitude': 'high',
        'lag_months': 1,
        'value_numeric': 15.0,
        'evidence_basis': 'empirical',
        'notes': 'Telebirr mass-onboarding leap from 12M to 60M baseline.'
    },
    {
        'record_id': 'IMP_0016', 
        'parent_id': 'REC_0024', 
        'record_type': 'impact_link',
        'pillar': 'ACCESS',
        'indicator': 'Fayda effect on Bank Linkage',
        'related_indicator': 'ACC_FAYDA_BANK_LINK',
        'impact_direction': 'increase',
        'impact_magnitude': 'high',
        'lag_months': 3,
        'value_numeric': 100.0,
        'evidence_basis': 'regulatory',
        'notes': 'NBE mandate for 100% linkage by March 2026.'
    }
]

# 5. Enrich (combine old + new)
impact_manager.enrich_data(new_impact_entries)

# 6. Save to the PROCESSED folder
# This keeps your 'raw' data original and puts the 'enriched' data in processed
impact_manager.save_to_csv(processed_path)

# 7. Verification Display
print("\n--- Combined Impact Sheet Preview ---")
display(impact_manager.df.tail(5))

✅ Loaded 14 records from raw impact_sheet.
--- Enrichment Success ---
Total Records: 16
✅ File saved as: ../data/processed/impact_links.csv

--- Combined Impact Sheet Preview ---


Unnamed: 0,record_id,parent_id,record_type,category,pillar,indicator,indicator_code,indicator_direction,value_numeric,value_text,...,impact_direction,impact_magnitude,impact_estimate,lag_months,evidence_basis,comparable_country,collected_by,collection_date,original_text,notes
11,IMP_0012,EVT_0007,impact_link,,USAGE,M-Pesa Interop effect on P2P Count,,,10.0,,...,increase,medium,10.0,3,literature,Tanzania,Example_Trainee,2025-01-20,,Cross-platform transactions now possible
12,IMP_0013,EVT_0008,impact_link,,USAGE,EthioPay effect on P2P Count,,,15.0,,...,increase,medium,15.0,6,literature,India,Example_Trainee,2025-01-20,,India UPI showed +25% volume increase
13,IMP_0014,EVT_0010,impact_link,,AFFORDABILITY,Safaricom Price Hike effect on Data Affordability,,,10.0,,...,increase,low,10.0,1,empirical,,Example_Trainee,2025-01-20,,Direct price increase (may be offset by switch...
14,IMP_0015,EVT_0001,impact_link,,USAGE,Telebirr effect on Active MM Accounts,,,15.0,,...,increase,high,,1,empirical,,,,,Telebirr mass-onboarding leap from 12M to 60M ...
15,IMP_0016,REC_0024,impact_link,,ACCESS,Fayda effect on Bank Linkage,,,100.0,,...,increase,high,,3,regulatory,,,,,NBE mandate for 100% linkage by March 2026.
