# Open Data Platform - Getting Started

This notebook demonstrates how to use the Open Data Platform to query and analyze economic data.

In [None]:
# Import libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import Open Data modules
from open_data.config import COUNTRIES, COUNTRY_CODES, WORLD_BANK_INDICATORS, Region, get_countries_by_region
from open_data.db.connection import session_scope, check_connection
from open_data.db.models import Country, Indicator, Observation

## 1. Check Database Connection

In [None]:
# Check if database is connected
if check_connection():
    print("Database connection OK!")
else:
    print("Database not connected. Run: docker-compose up -d postgres")

## 2. Explore Available Countries

In [None]:
# List all countries
countries_df = pd.DataFrame([
    {"ISO3": c.iso3, "ISO2": c.iso2, "Name": c.name, "Region": c.region.value, "Subregion": c.subregion}
    for c in COUNTRIES.values()
])

print(f"Total countries: {len(countries_df)}")
countries_df.head(10)

In [None]:
# Countries by region
countries_df.groupby("Region").size().plot(kind="bar", title="Countries by Region")

## 3. Explore Available Indicators

In [None]:
# List World Bank indicators
indicators_df = pd.DataFrame([
    {"Code": code, "Name": name}
    for code, name in WORLD_BANK_INDICATORS.items()
])

print(f"Total indicators: {len(indicators_df)}")
indicators_df

## 4. Fetch Data Directly from World Bank API

You can fetch data without storing it in the database.

In [None]:
from open_data.ingestion.world_bank import fetch_single_indicator

# Fetch GDP per capita for South American countries
latam_countries = ["ARG", "BRA", "CHL", "COL", "MEX"]

gdp_data = fetch_single_indicator(
    indicator="NY.GDP.PCAP.CD",
    countries=latam_countries,
    start_year=2000,
    end_year=2023
)

print(f"Records fetched: {len(gdp_data)}")
gdp_data.head()

In [None]:
# Visualize the data
if not gdp_data.empty:
    fig = px.line(
        gdp_data,
        x="year",
        y="value",
        color="country",
        title="GDP per Capita (current US$)",
        markers=True
    )
    fig.update_layout(height=500)
    fig.show()

## 5. Query Data from Database

After ingesting data, you can query it from the local database.

In [None]:
# Query data from database
def query_indicator(indicator_code: str, country_codes: list = None, start_year: int = 2000):
    """Query indicator data from the database."""
    with session_scope() as session:
        query = (
            session.query(
                Country.iso3_code,
                Country.name,
                Observation.year,
                Observation.value
            )
            .join(Observation, Country.id == Observation.country_id)
            .join(Indicator, Indicator.id == Observation.indicator_id)
            .filter(Indicator.code == indicator_code)
            .filter(Observation.year >= start_year)
        )
        
        if country_codes:
            query = query.filter(Country.iso3_code.in_(country_codes))
        
        query = query.order_by(Country.iso3_code, Observation.year)
        results = query.all()
        
    return pd.DataFrame(results, columns=["iso3", "name", "year", "value"])

# Example query (uncomment after ingesting data)
# df = query_indicator("NY.GDP.PCAP.CD", ["ARG", "BRA", "CHL"])
# df.head()

## 6. Country Comparison

In [None]:
# Compare multiple indicators
def compare_countries(countries: list, indicators: list, year: int = 2022):
    """Compare countries across multiple indicators."""
    from open_data.ingestion.world_bank import WorldBankCollector
    
    collector = WorldBankCollector(
        countries=countries,
        start_year=year,
        end_year=year,
        indicators=indicators
    )
    
    df = collector.fetch_data(indicators, countries)
    
    if df.empty:
        return pd.DataFrame()
    
    # Pivot to wide format
    pivot = df.pivot(index="country", columns="indicator", values="value")
    return pivot

# Example comparison
comparison = compare_countries(
    countries=["USA", "CHN", "DEU", "JPN", "GBR"],
    indicators=["NY.GDP.PCAP.CD", "NY.GDP.MKTP.KD.ZG", "FP.CPI.TOTL.ZG"],
    year=2022
)

comparison

## Next Steps

1. **Ingest more data**: `opendata ingest worldbank`
2. **Explore the web dashboard**: `opendata web`
3. **Add more data sources**: IMF, UNHCR, ITU
4. **Create custom analyses**: Time series forecasting, clustering, etc.