# Exploratory Data Analysis: Financial Inclusion in Ethiopia

This notebook analyzes the Ethiopia Financial Inclusion unified dataset to identify patterns, trends, and drivers of digital financial transformation.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from datetime import datetime

# Set style
sns.set_theme(style="whitegrid")
df = pd.read_csv('../data/raw/ethiopia_fi_unified_data.csv')
df['observation_date'] = pd.to_datetime(df['observation_date'])
df.head()

## 1. Dataset Overview
Summarizing the dataset by record type, pillar, and source type.

In [None]:
plt.figure(figsize=(10, 5))
sns.countplot(data=df, x='record_type', palette='Set2')
plt.title("Records by Type")
plt.show()

print("Counts by Pillar (Observations only):")
print(df[df['record_type'] == 'observation']['pillar'].value_counts())

## 2. Access Analysis: Account Ownership Transition
Visualizing the trajectory from 2011 to 2024.

In [None]:
acc_own = df[(df['indicator_code'] == 'ACC_OWNERSHIP') & (df['record_type'] == 'observation') & (df['gender'] == 'all')]
acc_own = acc_own.sort_values('observation_date')

plt.figure(figsize=(10, 6))
plt.plot(acc_own['observation_date'], acc_own['value_numeric'], marker='o', linewidth=2, color='#1f77b4')
plt.title("Ethiopia: Account Ownership Rate (2014-2024)")
plt.ylabel("Ownership Rate (%)")
plt.ylim(0, 70)
for x, y in zip(acc_own['observation_date'], acc_own['value_numeric']):
    plt.text(x, y + 2, f"{y}%", ha='center')
plt.show()

## 3. Usage Analysis: Digital vs Cash
Comparing P2P transaction counts with ATM withdrawals.

In [None]:
p2p_atm = df[df['indicator_code'].isin(['USG_P2P_COUNT', 'USG_ATM_COUNT'])]
p2p_atm_recent = p2p_atm[p2p_atm['fiscal_year'] == 'FY2024/25']

plt.figure(figsize=(8, 6))
sns.barplot(data=p2p_atm_recent, x='indicator', y='value_numeric', palette='viridis')
plt.title("Transaction Volume: P2P vs ATM (FY2024/25)")
plt.ylabel("Count")
plt.show()

## 4. Event Timeline
Mapping major events that influenced the indicators.

In [None]:
events = df[df['record_type'] == 'event'].sort_values('observation_date')
plt.figure(figsize=(14, 5))
plt.scatter(events['observation_date'], [1]*len(events), color='darkorange', marker='s', s=100)
for i, row in events.iterrows():
    plt.text(row['observation_date'], 1.02, row['indicator'], rotation=45, ha='right', fontsize=9)
plt.title("Major Financial Inclusion Events")
plt.yticks([])
plt.ylim(0.98, 1.2)
plt.show()

## 5. Key Insights & Data Quality
- **Ownership Paradox:** 46% (2021) to 49% (2024) growth is low relative to mobile money registration.
- **Crossover:** P2P counts now exceed ATM counts.
- **Data Quality:** High for survey/operator data; Medium for enriched research data (Smartphone penetration).