<a href="https://colab.research.google.com/github/ellozam/ciencia-datos-notebooks/blob/main/proyecto2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Instalar dependencias si es necesario
!pip install openml gradio plotly seaborn scikit-learn

import openml
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import gradio as gr
import plotly.express as px
import socket
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

modelo_entrenado = None  # Variable global para el modelo

def comprobar_conexion():
    try:
        socket.create_connection(("www.google.com", 80), timeout=5)
        return True
    except (socket.timeout, socket.gaierror):
        return False

def entrenar_modelo_regresion(data):
    try:
        columnas_requeridas = ['education_level', 'employment_rate', 'year', 'country', 'gdp_per_capita']
        if not all(col in data.columns for col in columnas_requeridas):
            raise ValueError("Faltan columnas necesarias.")

        data = data.dropna(subset=columnas_requeridas)

        X = data[['education_level', 'employment_rate', 'year', 'country']]
        y = data['gdp_per_capita']

        preprocessor = ColumnTransformer(
            transformers=[
                ('cat', OneHotEncoder(handle_unknown='ignore'), ['education_level', 'country'])
            ],
            remainder='passthrough'
        )

        model = Pipeline(steps=[
            ('preprocessor', preprocessor),
            ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
        ])

        model.fit(X, y)
        return model

    except Exception as e:
        print(f"Error al entrenar el modelo: {e}")
        return None

def cargar_y_filtrar(openml_id, min_gdp, max_gdp, countries, year_start, year_end,
                     education_levels, employment_rate, columnas_a_mostrar, filtro_condicional):
    global modelo_entrenado
    try:
        if not comprobar_conexion():
            return "Error: No hay conexión a Internet."

        dataset = openml.datasets.get_dataset(openml_id)
        X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute())
        X['gdp_per_capita'] = y

        filtered = X[(X['gdp_per_capita'] > min_gdp) & (X['gdp_per_capita'] < max_gdp)]

        if countries:
            countries_list = [c.strip() for c in countries.split(",")]
            if 'country' in X.columns:
                filtered = filtered[filtered['country'].isin(countries_list)]
            else:
                return "Error: No se encuentra la columna 'country'."

        if year_start and year_end:
            if 'year' in X.columns:
                filtered = filtered[(filtered['year'] >= year_start) & (filtered['year'] <= year_end)]
            else:
                return "Error: No se encuentra la columna 'year'."

        if education_levels:
            levels = [l.strip() for l in education_levels.split(",")]
            if 'education_level' in X.columns:
                filtered = filtered[filtered['education_level'].isin(levels)]
            else:
                return "Error: No se encuentra la columna 'education_level'."

        if employment_rate:
            if 'employment_rate' in X.columns:
                filtered = filtered[filtered['employment_rate'] >= employment_rate]
            else:
                return "Error: No se encuentra la columna 'employment_rate'."

        # Aplicar condición personalizada por filas
        if filtro_condicional:
            try:
                filtered = filtered.query(filtro_condicional)
            except Exception as e:
                return f"Error en condición de filtrado: {e}"

        # Mostrar solo las columnas seleccionadas
        if columnas_a_mostrar:
            columnas = [col.strip() for col in columnas_a_mostrar.split(",")]
            missing = [col for col in columnas if col not in filtered.columns]
            if missing:
                return f"Columnas no encontradas: {missing}"
            filtered = filtered[columnas]

        # Mostrar gráfico de correlación
        if filtered.select_dtypes(include='number').shape[1] > 1:
            corr = filtered.corr(numeric_only=True)
            plt.figure(figsize=(10, 8))
            sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
            plt.title("Matriz de Correlación")
            plt.show()

        # Gráfico interactivo Plotly
        if 'gdp_per_capita' in filtered.columns and 'education_level' in filtered.columns:
            fig = px.scatter(filtered, x='gdp_per_capita', y='education_level', color='country',
                             title="PIB vs Nivel Educativo")
            fig.show()

        # Gráfico de barras por país
        if 'country' in filtered.columns and 'gdp_per_capita' in filtered.columns:
            plt.figure(figsize=(12, 6))
            sns.barplot(x='country', y='gdp_per_capita', data=filtered)
            plt.xticks(rotation=90)
            plt.title('Distribución del PIB per cápita por País')
            plt.tight_layout()
            plt.show()

        # Entrenar modelo
        modelo_entrenado = entrenar_modelo_regresion(filtered)

        return filtered.head(20)

    except Exception as e:
        return f"Error: {e}"

def predecir_pib(education_level, employment_rate, year, country):
    global modelo_entrenado
    if modelo_entrenado is None:
        return "Primero debes cargar y filtrar los datos para entrenar el modelo."

    entrada = pd.DataFrame([{
        'education_level': education_level,
        'employment_rate': employment_rate,
        'year': year,
        'country': country
    }])

    try:
        pred = modelo_entrenado.predict(entrada)[0]
        return f"🔮 PIB per cápita estimado: ${round(pred, 2)}"
    except Exception as e:
        return f"Error en la predicción: {e}"

# Interfaz
with gr.Blocks() as demo:
    gr.Markdown("## 🌍 Explorador Económico + IA: Filtrado y Predicción de PIB")

    with gr.Row():
        openml_id = gr.Number(label="ID del Dataset en OpenML", value=45104)
        min_gdp = gr.Slider(1000, 50000, step=100, label="PIB mínimo", value=1000)
        max_gdp = gr.Slider(1000, 50000, step=100, label="PIB máximo", value=50000)

    with gr.Row():
        countries = gr.Textbox(label="Países (ej. 'USA,Canada')", value="USA")
        year_start = gr.Slider(1900, 2025, step=1, label="Año de inicio", value=2000)
        year_end = gr.Slider(1900, 2025, step=1, label="Año de fin", value=2020)

    with gr.Row():
        education_levels = gr.Textbox(label="Niveles educativos (ej. 'High School,Bachelor')", value="High School")
        employment_rate = gr.Slider(0, 100, step=1, label="Tasa de empleo mínima (%)", value=50)

    with gr.Row():
        columnas_a_mostrar = gr.Textbox(label="Columnas a mostrar (ej. 'country,year,gdp_per_capita')")
        filtro_condicional = gr.Textbox(label="Condición de filas (ej. 'employment_rate > 60')")

    output_table = gr.Dataframe(label="Datos Filtrados")
    btn_filtrar = gr.Button("🔍 Cargar y Filtrar Datos")

    btn_filtrar.click(
        fn=cargar_y_filtrar,
        inputs=[
            openml_id, min_gdp, max_gdp, countries, year_start, year_end,
            education_levels, employment_rate,
            columnas_a_mostrar, filtro_condicional
        ],
        outputs=output_table
    )

    gr.Markdown("## 🤖 Predicción de PIB per cápita con IA")

    with gr.Row():
        input_edu = gr.Textbox(label="Nivel Educativo", value="High School")
        input_emp = gr.Slider(0, 100, step=1, label="Tasa de Empleo (%)", value=60)
        input_year = gr.Slider(1900, 2025, step=1, label="Año", value=2020)
        input_country = gr.Textbox(label="País", value="USA")

    btn_predecir = gr.Button("🎯 Predecir PIB per cápita")
    output_pred = gr.Textbox(label="Resultado de la Predicción")

    btn_predecir.click(
        fn=predecir_pib,
        inputs=[input_edu, input_emp, input_year, input_country],
        outputs=output_pred
    )

demo.launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0002949591cd1e8ae3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


