<a href="https://colab.research.google.com/github/ethane66/MSFT-Talent-AI/blob/main/Proyecto_Final_NLP_Terminado.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

IMPORTANTE:


*   Usar google GPU T4
*   La API de google cloud tiene que ser "Google Maps Platform"
Creo que con el My frist proyect que te crea en google cloud platform en tu consola puedes sacar la API de ese proyecto asi no tienes que crear otro y directamente usa el que necesita.



In [1]:
!pip install pysentimiento
!pip install pysentimiento gradio
import pandas as pd
import requests
import time
from datetime import datetime
from pysentimiento import create_analyzer
import gradio as gr

class GoogleMapsReviewExtractor:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
        self.details_url = "https://maps.googleapis.com/maps/api/place/details/json"

    def _make_api_request(self, url, params):
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"API Error: {e}")
            return None

    def search_institutes(self, query, max_results=60):
        params = {
            "query": query,
            "key": self.api_key,
            "language": "es",
            "region": "es"
        }

        all_results = []
        while True:
            data = self._make_api_request(self.base_url, params)
            if not data:
                break

            results = data.get("results", [])
            all_results.extend(results)

            if len(all_results) >= max_results:
                break

            next_page_token = data.get("next_page_token")
            if not next_page_token:
                break

            print("⏳ Cargando más resultados...")
            time.sleep(2)
            params = {
                "pagetoken": next_page_token,
                "key": self.api_key
            }

        return all_results[:max_results]

    def get_reviews(self, place_id, max_reviews=10):
        params = {
            "place_id": place_id,
            "key": self.api_key,
            "fields": "review,name,rating",
            "language": "es"
        }

        data = self._make_api_request(self.details_url, params)
        if not data or "result" not in data:
            return []

        result = data["result"]
        reviews = result.get("reviews", [])[:max_reviews]

        return [{
            "text": review.get("text", ""),
            "rating": review.get("rating", 0),
            "time": datetime.fromtimestamp(review.get("time", 0)),
            "institute_name": result.get("name", "")
        } for review in reviews]

class SentimentAnalyzer:
    def __init__(self):
        self.analyzer = create_analyzer(task="sentiment", lang="es")

    def analyze_review(self, text):
        try:
            result = self.analyzer.predict(text)
            sentiment_label = result.output
            if sentiment_label == "POS":
                sentiment = "bueno"
            elif sentiment_label == "NEU":
                sentiment = "neutro"
            else:
                sentiment = "malo"
            return {
                "sentiment": sentiment,
                "confidence": round(result.probas[sentiment_label], 2)
            }
        except Exception as e:
            print(f"Analysis error: {str(e)}")
            return None

global_df = pd.DataFrame()

def analizar(api_key, comunidad):
    global global_df
    extractor = GoogleMapsReviewExtractor(api_key)
    analyzer = SentimentAnalyzer()

    query = f"FP Grado Superior ASIR {comunidad}"
    resultados = extractor.search_institutes(query)

    all_reviews = []
    for centro in resultados:
        place_id = centro.get("place_id")
        if not place_id:
            continue

        reviews = extractor.get_reviews(place_id)
        for review in reviews:
            analysis = analyzer.analyze_review(review["text"])
            if analysis:
                review.update(analysis)
                all_reviews.append(review)

    if not all_reviews:
        return "❌ No se encontraron reseñas.", gr.update(choices=[]), gr.update(choices=[]), None

    df = pd.DataFrame(all_reviews)
    global_df = df.copy()

    resumen = (
        f"🔢 Total reseñas: {len(df)}\n"
        f"⭐ Promedio puntuación: {round(df['rating'].mean(), 2)}\n"
        f"🏅 Mejor centro: {df.groupby('institute_name')['rating'].mean().idxmax()}"
    )

    institutos = sorted(df["institute_name"].unique())
    return resumen, gr.update(choices=institutos), gr.update(choices=["bueno", "neutro", "malo"]), df

def filtrar_datos(nombre_centro, sentimiento, orden):
    df = global_df.copy()
    if not df.empty:
        if nombre_centro:
            df = df[df["institute_name"] == nombre_centro]
        if sentimiento:
            df = df[df["sentiment"] == sentimiento]
        if orden == "Puntuación":
            df = df.sort_values(by="rating", ascending=False)
        elif orden == "Confianza":
            df = df.sort_values(by="confidence", ascending=False)

        df = df.rename(columns={
            "institute_name": "Centro",
            "text": "Reseña",
            "rating": "Puntuación",
            "sentiment": "Sentimiento",
            "confidence": "Confianza",
            "time": "Fecha"
        })[["Centro", "Reseña", "Puntuación", "Sentimiento", "Confianza", "Fecha"]]

        # Acortar texto para vista compacta
        df["Reseña"] = df["Reseña"].apply(lambda x: x if len(x) <= 200 else x[:200] + "...")

        return df
    else:
        return pd.DataFrame()

custom_css = """
body {
    background-color: #121212;
    color: white;
}
.gradio-container {
    background-color: #121212;
    color: white;
}
label, .label, .svelte-1ipelgc {
    color: white !important;
}
textarea, input, select {
    background-color: #1e1e1e !important;
    color: white !important;
}
button {
    background-color: #ff8c00 !important;
    color: white !important;
}
table {
    background-color: #000 !important;
    color: white !important;
}
td, th {
    background-color: #000 !important;
    color: white !important;
    max-width: 300px;
    overflow: hidden;
    text-overflow: ellipsis;
    white-space: nowrap;
}
"""

with gr.Blocks(title="🔎 Análisis de Sentimientos FP ASIR", css=custom_css) as demo:
    gr.Markdown("## 🧠 Análisis de Reseñas de Centros FP en ASIR")
    with gr.Row():
        api_key = gr.Textbox(label="🔑 API Key de Google Maps", type="password")
        comunidad = gr.Textbox(label="🌍 Comunidad Autónoma", value="Madrid")
        analizar_btn = gr.Button("Analizar")

    resumen = gr.Textbox(label="📋 Resumen del análisis")

    with gr.Row():
        filtro_centro = gr.Dropdown(label="🏫 Filtrar por centro", choices=[])
        filtro_sentimiento = gr.Dropdown(label="💬 Filtrar por sentimiento", choices=[])
        orden = gr.Radio(["Ninguno", "Puntuación", "Confianza"], label="📊 Ordenar por")

    tabla_resultado = gr.Dataframe(label="📄 Reseñas filtradas")

    analizar_btn.click(
        analizar,
        inputs=[api_key, comunidad],
        outputs=[resumen, filtro_centro, filtro_sentimiento, tabla_resultado]
    )

    filtro_centro.change(filtrar_datos, [filtro_centro, filtro_sentimiento, orden], tabla_resultado)
    filtro_sentimiento.change(filtrar_datos, [filtro_centro, filtro_sentimiento, orden], tabla_resultado)
    orden.change(filtrar_datos, [filtro_centro, filtro_sentimiento, orden], tabla_resultado)

demo.launch(share=True)


Collecting pysentimiento
  Downloading pysentimiento-0.7.3-py3-none-any.whl.metadata (7.7 kB)
Collecting datasets>=2.10.1 (from pysentimiento)
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting emoji>=1.6.1 (from pysentimiento)
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.10.1->pysentimiento)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets>=2.10.1->pysentimiento)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets>=2.10.1->pysentimiento)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets>=2.10.1->pysentimiento)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch!=2.0.1,>=2.0.0->pyse

