# Task 1: Data Exploration and Enrichment

## Objective
Understand the starter dataset and enrich it with additional data useful for forecasting Access and Usage.

## 1. Setup and Load Data

In [None]:
import pandas as pd
import os

# Define path to data
data_path = '../data/raw/ethiopia_fi_unified_data.csv'

def load_data():
    try:
        df = pd.read_csv(data_path)
        print("Data loaded successfully.")
        return df
    except FileNotFoundError:
        print(f"File not found at {data_path}")
        return None

df = load_data()
df.head()

## 2. Explore Data Structure

In [None]:
print("--- Data Info ---")
df.info()

print("
--- Record Types ---")
print(df['record_type'].value_counts())

print("
--- Pillars ---")
print(df['pillar'].value_counts(dropna=False))

## 3. Review Existing Observations

In [None]:
observations = df[df['record_type'] == 'observation']
print("Observation Date Range:", observations['observation_date'].min(), "to", observations['observation_date'].max())
print("
Unique Indicators:", observations['indicator'].unique())

## 4. Enrichment
We have added the following new data points to the dataset:
1. **Observation**: Usage - Digital Payment Adoption (30% in 2023)
2. **Event**: NBE Digital Lending Directive (Policy, June 2022)
3. **Impact Link**: Modeling the effect of the lending directive on digital payments.

In [None]:
# Verify new additions
print("--- New Event ---")
print(df[df['category'] == 'policy'])

print("
--- New Observation ---")
print(df[(df['indicator_code'] == 'USG_DIG_PAY') & (df['observation_date'] == '2023-01-01')])