# Africa GDP Growth and Employment Analysis

**Research Question:** How do GDP growth and employment to population ratio change over time in Africa?

**Team Members:** [Add names here]


## Step 1: Setup

In [None]:
import wbgapi as wb
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("Setup complete")

## Step 2: Get Data from World Bank

In [None]:
# African countries
countries = ['DZA', 'EGY', 'ETH', 'GHA', 'KEN', 'MAR', 'NGA', 'ZAF', 'TZA', 'UGA']

# Fetch GDP growth and employment data
gdp = wb.data.DataFrame('NY.GDP.MKTP.KD.ZG', countries, time=range(2000, 2024), numericTimeKeys=True)
emp = wb.data.DataFrame('SL.EMP.TOTL.SP.ZS', countries, time=range(2000, 2024), numericTimeKeys=True)

# Reshape to long format
gdp_long = gdp.reset_index().melt(id_vars='economy', var_name='year', value_name='gdp_growth')
emp_long = emp.reset_index().melt(id_vars='economy', var_name='year', value_name='employment_ratio')

# Combine
df = gdp_long.merge(emp_long, on=['economy', 'year'])
df.columns = ['country', 'year', 'gdp_growth', 'employment_ratio']

print(f"Fetched {len(df)} records")
df.head()

## Step 3: Clean Data with SQL

In [None]:
# Create database and load data
conn = sqlite3.connect('data.db')
df.to_sql('africa_data', conn, if_exists='replace', index=False)

# Clean: keep only complete records
df_clean = pd.read_sql_query("""
    SELECT country, year, gdp_growth, employment_ratio
    FROM africa_data
    WHERE gdp_growth IS NOT NULL AND employment_ratio IS NOT NULL
""", conn)

print(f"Clean data: {len(df_clean)} records")

## Step 4: Descriptive Statistics (SQL)

In [None]:
# SQL descriptive statistics
stats = pd.read_sql_query("""
    SELECT 
        COUNT(*) as total_records,
        COUNT(DISTINCT country) as num_countries,
        MIN(year) as start_year,
        MAX(year) as end_year,
        ROUND(AVG(gdp_growth), 2) as avg_gdp_growth,
        ROUND(MIN(gdp_growth), 2) as min_gdp_growth,
        ROUND(MAX(gdp_growth), 2) as max_gdp_growth,
        ROUND(AVG(employment_ratio), 2) as avg_employment,
        ROUND(MIN(employment_ratio), 2) as min_employment,
        ROUND(MAX(employment_ratio), 2) as max_employment
    FROM africa_data
    WHERE gdp_growth IS NOT NULL AND employment_ratio IS NOT NULL
""", conn)

print("\nDescriptive Statistics:")
print(stats.T)

conn.close()

## Step 5: Visualization

In [None]:
# Average GDP growth over time
yearly = df_clean.groupby('year')['gdp_growth'].mean()

plt.figure(figsize=(12, 6))
plt.plot(yearly.index, yearly.values, marker='o', linewidth=2, color='steelblue')
plt.axhline(y=0, color='red', linestyle='--', alpha=0.5)
plt.xlabel('Year', fontsize=12)
plt.ylabel('GDP Growth (%)', fontsize=12)
plt.title('Average GDP Growth in Africa (2000-2023)', fontsize=14, fontweight='bold')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## Step 6: Key Findings

In [None]:
print("="*60)
print("KEY FINDINGS")
print("="*60)
print(f"\nCountries analyzed: {df_clean['country'].nunique()}")
print(f"Time period: {int(df_clean['year'].min())} - {int(df_clean['year'].max())}")
print(f"\nAverage GDP Growth: {df_clean['gdp_growth'].mean():.2f}%")
print(f"Average Employment Ratio: {df_clean['employment_ratio'].mean():.2f}%")
print("\n" + "="*60)