# Africa GDP Growth and Employment Analysis

**Research Question:** How do GDP growth and employment to population ratio change over time in Africa?

**Team Members:** [Add names here]


## Step 1: Setup

In [None]:
import wbgapi as wb
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("Setup complete")

## Step 2: Get Data from World Bank

In [None]:
# African countries
countries = ['DZA', 'EGY', 'ETH', 'GHA', 'KEN', 'MAR', 'NGA', 'ZAF', 'TZA', 'UGA']

# Fetch GDP growth and employment data
gdp = wb.data.DataFrame('NY.GDP.MKTP.KD.ZG', countries, time=range(2000, 2024), numericTimeKeys=True)
emp = wb.data.DataFrame('SL.EMP.TOTL.SP.ZS', countries, time=range(2000, 2024), numericTimeKeys=True)

# Reshape to long format
gdp_long = gdp.reset_index().melt(id_vars='economy', var_name='year', value_name='gdp_growth')
emp_long = emp.reset_index().melt(id_vars='economy', var_name='year', value_name='employment_ratio')

# Combine
df = gdp_long.merge(emp_long, on=['economy', 'year'])
df.columns = ['country', 'year', 'gdp_growth', 'employment_ratio']

print(f"Fetched {len(df)} records")
df.head()

## Step 3: Clean Data with SQL

In [None]:
# Create database and load data
conn = sqlite3.connect('data.db')
df.to_sql('africa_data', conn, if_exists='replace', index=False)

# Clean: keep only complete records
df_clean = pd.read_sql_query("""
    SELECT country, year, gdp_growth, employment_ratio
    FROM africa_data
    WHERE gdp_growth IS NOT NULL AND employment_ratio IS NOT NULL
""", conn)

print(f"Clean data: {len(df_clean)} records")

## Step 4: Descriptive Statistics (SQL)

In [None]:
# SQL descriptive statistics
stats = pd.read_sql_query("""
    SELECT 
        COUNT(*) as total_records,
        COUNT(DISTINCT country) as num_countries,
        MIN(year) as start_year,
        MAX(year) as end_year,
        ROUND(AVG(gdp_growth), 2) as avg_gdp_growth,
        ROUND(MIN(gdp_growth), 2) as min_gdp_growth,
        ROUND(MAX(gdp_growth), 2) as max_gdp_growth,
        ROUND(AVG(employment_ratio), 2) as avg_employment,
        ROUND(MIN(employment_ratio), 2) as min_employment,
        ROUND(MAX(employment_ratio), 2) as max_employment
    FROM africa_data
    WHERE gdp_growth IS NOT NULL AND employment_ratio IS NOT NULL
""", conn)

print("\nDescriptive Statistics:")
print(stats.T)

conn.close()

## Step 5: Visualization

In [None]:
# 1. GDP Growth vs Employment over time (dual axis)
fig, ax1 = plt.subplots(figsize=(10, 6))

yearly_gdp = df_clean.groupby('year')['gdp_growth'].mean()
yearly_emp = df_clean.groupby('year')['employment_ratio'].mean()

color = 'tab:blue'
ax1.set_xlabel('Year', fontsize=11)
ax1.set_ylabel('GDP Growth (%)', color=color, fontsize=11)
ax1.plot(yearly_gdp.index, yearly_gdp.values, color=color, linewidth=2.5, marker='o', markersize=5)
ax1.tick_params(axis='y', labelcolor=color)
ax1.axhline(y=0, color='red', linestyle='--', alpha=0.3)
ax1.grid(True, alpha=0.3)

ax2 = ax1.twinx()
color = 'tab:green'
ax2.set_ylabel('Employment Ratio (%)', color=color, fontsize=11)
ax2.plot(yearly_emp.index, yearly_emp.values, color=color, linewidth=2.5, marker='s', markersize=5, linestyle='--')
ax2.tick_params(axis='y', labelcolor=color)

plt.title('Africa: GDP Growth vs Employment Ratio (2000-2023)', fontsize=13, fontweight='bold')
fig.tight_layout()
plt.savefig('../figures/africa_gdp_employment_trend.png', dpi=300, bbox_inches='tight')
plt.show()
print("Saved: africa_gdp_employment_trend.png")

## Step 6: Key Findings

In [None]:
# 2. Scatter plot: GDP Growth vs Employment
import seaborn as sns
sns.set_theme(style="whitegrid")

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_clean, x='gdp_growth', y='employment_ratio', s=80, alpha=0.6, color='steelblue')
plt.xlabel('GDP Growth (%)', fontsize=11)
plt.ylabel('Employment Ratio (%)', fontsize=11)
plt.title('Africa: GDP Growth vs Employment Correlation (2000-2023)', fontsize=12, fontweight='bold')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig('../figures/africa_gdp_employment_scatter.png', dpi=300, bbox_inches='tight')
plt.show()
print("Saved: africa_gdp_employment_scatter.png")

# Summary statistics
print("="*60)
print("KEY FINDINGS")
print("="*60)
print(f"\nCountries analyzed: {df_clean['country'].nunique()}")
print(f"Time period: {int(df_clean['year'].min())} - {int(df_clean['year'].max())}")
print(f"Total observations: {len(df_clean)}")
print(f"\nAverage GDP Growth: {df_clean['gdp_growth'].mean():.2f}%")
print(f"Average Employment Ratio: {df_clean['employment_ratio'].mean():.2f}%")
print(f"GDP-Employment Correlation: {df_clean['gdp_growth'].corr(df_clean['employment_ratio']):.3f}")
print("\n" + "="*60)