<a href="https://colab.research.google.com/github/ellozam/ciencia-datos-notebooks/blob/main/proyecto3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Install required packages
!pip install wbdata prophet kaleido pycountry openml pandas plotly gradio

import openml
import pandas as pd
import plotly.express as px
import socket
import gradio as gr
from PIL import Image
import pycountry
import wbdata
from prophet import Prophet
from datetime import datetime, timedelta
import numpy as np

# Function to check internet connection
def comprobar_conexion():
    try:
        socket.create_connection(("www.google.com", 80), timeout=5)
        return True
    except (socket.timeout, socket.gaierror):
        return False

# Function to standardize country names using pycountry
def standardize_country(name):
    try:
        country = pycountry.countries.search_fuzzy(name)[0]
        return country.name
    except:
        return name

# Function to fetch and predict GDP data
def fetch_and_predict_gdp(countries, year_end):
    try:
        # Get current year
        current_year = datetime.now().year
        years = list(range(2010, min(current_year + 1, year_end + 1)))

        # Fetch GDP per capita from World Bank
        indicator = {"NY.GDP.PCAP.CD": "gdp_per_capita"}
        df_wb = wbdata.get_dataframe(indicator, country="all")

        # Reset index to make 'country' a column
        df_wb = df_wb.reset_index()

        # Standardize country names
        df_wb['country'] = df_wb['country'].apply(standardize_country)

        # Filter for relevant years
        df_wb = df_wb[df_wb['date'].astype(int).isin(years)]

        # Pivot to have years as rows
        df_wb = df_wb.pivot_table(values='gdp_per_capita', index='country', columns='date').reset_index()

        # AI prediction for missing recent data
        df_predicted = df_wb.copy()
        for country in df_predicted['country']:
            # Get historical GDP data for the country
            country_data = df_predicted[df_predicted['country'] == country].melt(id_vars=['country'], var_name='year', value_name='gdp')
            country_data = country_data.dropna(subset=['gdp'])
            country_data['year'] = country_data['year'].astype(int)
            country_data['ds'] = pd.to_datetime(country_data['year'].astype(str) + '-12-31')
            country_data['y'] = country_data['gdp']

            if len(country_data) < 3:  # Need at least 3 points for Prophet
                continue

            # Train Prophet model
            model = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
            model.fit(country_data[['ds', 'y']])

            # Predict up to year_end
            future_years = pd.date_range(start=f'{country_data["year"].max() + 1}-12-31',
                                         end=f'{year_end}-12-31', freq='Y')
            if len(future_years) == 0:
                continue
            future = pd.DataFrame({'ds': future_years})
            forecast = model.predict(future)[['ds', 'yhat']]
            forecast['year'] = forecast['ds'].dt.year
            forecast['gdp'] = forecast['yhat']
            forecast['country'] = country

            # Append predictions
            for year in forecast['year']:
                if str(year) not in df_predicted.columns:
                    df_predicted[str(year)] = np.nan
                df_predicted.loc[df_predicted['country'] == country, str(year)] = forecast[forecast['year'] == year]['gdp'].values[0]

        # Melt back to long format
        df_final = df_predicted.melt(id_vars=['country'], var_name='year', value_name='gdp_per_capita')
        df_final = df_final[df_final['year'].astype(int) <= year_end]
        df_final['gdp_per_capita'] = df_final['gdp_per_capita'].clip(lower=0)  # Ensure no negative GDP

        # Filter by countries if provided
        if countries:
            country_list = [standardize_country(c.strip()) for c in countries.split(',')]
            df_final = df_final[df_final['country'].isin(country_list)]

        return df_final

    except Exception as e:
        return f"Error fetching/predicting GDP data: {e}"

def cargar_y_filtrar(openml_id, min_gdp, max_gdp, countries, year_start, year_end, education_levels, employment_rate):
    try:
        # Check internet connection
        if not comprobar_conexion():
            return "Error: No internet connection. Please check and try again."

        # Fetch real-time GDP data with AI predictions
        df = fetch_and_predict_gdp(countries, year_end)
        if isinstance(df, str):  # Error message
            return df

        # Filter by year range
        df = df[(df['year'].astype(int) >= year_start) & (df['year'].astype(int) <= year_end)]

        # Filter by GDP range
        df = df[(df['gdp_per_capita'] >= min_gdp) & (df['gdp_per_capita'] <= max_gdp)]

        # Check if data is empty
        if df.empty:
            return f"No data available after filtering. Try adjusting GDP ({min_gdp}-{max_gdp}) or years ({year_start}-{year_end})."

        # Create choropleth map
        fig_map = px.choropleth(
            df,
            locations="country",
            locationmode="country names",
            color="gdp_per_capita",
            hover_name="country",
            color_continuous_scale="Viridis",
            labels={'gdp_per_capita': 'GDP per Capita'},
            title=f"GDP per Capita by Country ({year_start}-{year_end})",
            animation_frame="year"  # Add animation for year if multiple years
        )

        # Save map as image
        image_file = "mapa_pib_per_capita.png"
        fig_map.write_image(image_file, engine="kaleido", width=800, height=600)
        return Image.open(image_file)

    except Exception as e:
        return f"Error generating map: {e}"

# Gradio interface
gr.Interface(
    fn=cargar_y_filtrar,
    inputs=[
        gr.Number(label="OpenML Dataset ID (unused, kept for compatibility)", value=45104),
        gr.Slider(minimum=1000, maximum=200000, step=100, label="Min GDP per Capita", value=1000),
        gr.Slider(minimum=1000, maximum=200000, step=100, label="Max GDP per Capita", value=100000),
        gr.Textbox(label="Countries (comma-separated, e.g., 'United States,Canada')", value=""),
        gr.Slider(minimum=2010, maximum=2025, step=1, label="Start Year", value=2015),
        gr.Slider(minimum=2010, maximum=2025, step=1, label="End Year", value=2025),
        gr.Textbox(label="Education Levels (unused)", value=""),
        gr.Slider(minimum=0, maximum=100, step=1, label="Min Employment Rate (unused)", value=0)
    ],
    outputs=[gr.Image(label="GDP per Capita Map")],
    title="Real-Time Economic Data Explorer with AI",
    description="Visualize AI-updated GDP per capita by country using World Bank data and Prophet predictions."
).launch()



Collecting wbdata
  Downloading wbdata-1.0.0-py3-none-any.whl.metadata (2.6 kB)
Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Collecting appdirs<2.0,>=1.4 (from wbdata)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting backoff<3.0.0,>=2.2.1 (from wbdata)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting dateparser<2.0.0,>=1.2.0 (from wbdata)
  Downloading dateparser-1.2.1-py3-none-any.whl.metadata (29 kB)
Collecting decorator<6.0.0,>=5.1.1 (from wbdata)
  Downloading decorator-5.2.1-py3-none-any.whl.metadata (3.9 kB)
Collecting shelved-cache<0.4.0,>=0.3.1 (from wbdata)
  Downloading shelved_cache-0.3.1-py3-none-any.whl.metadata (4.7 kB)
Collecting tabulate<0.9.0,>=0.8.5 (from wbdata)
  Downloading tabulate-0.8.10-py3-none-any.whl.metadata (25 kB)
Downloading wbdata-1.0.0-py3-none-any.whl (18 kB)
Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K   [90m━━━━━━

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5fe3c716eb67dd5aeb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


