In [1]:
# Transformers installation
! pip install transformers datasets
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+https://github.com/huggingface/transformers.git

Collecting datasets
  Downloading datasets-3.3.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.3.0-py3-none-any.whl (484 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m484.9/484.9 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xx

# Pipeline


In [1]:
import gradio as gr
from huggingface_hub import InferenceClient
from fastapi import FastAPI
from fastapi.openapi.docs import get_swagger_ui_html
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from typing import List, Dict
import uvicorn
import threading

# Função para exibir o Swagger UI
def swagger_ui():
    return get_swagger_ui_html(
        openapi_url="/openapi.json",
        title="Swagger UI",
    )

# Inicializa a API FastAPI
app = FastAPI(title="Sugestão de Colunas API", version="1.0", description="API para sugerir colunas com base em embeddings.")

# Inicializa o cliente de inferência do Hugging Face Hub
client = InferenceClient()

# Lista de colunas esperadas
expected_columns = ["data", "valor", "categoria", "produto", "preço", "quantidade"]

# Modelo de entrada da API
class ColumnRequest(BaseModel):
    columns: List[str]

# Modelo de saída da API
class ColumnResponse(BaseModel):
    suggestions: Dict[str, Dict[str, float]]

@app.post("/suggest-columns", response_model=ColumnResponse, summary="Sugere colunas com base em embeddings")
async def suggest_columns(request: ColumnRequest):
    suggestions = {}

    # Obtém os embeddings das colunas esperadas usando a Inference API
    expected_embeddings = client.encode(expected_columns, model="Supabase/gte-small")

    for col in request.columns:
        # Obtém o embedding da coluna atual usando a Inference API
        col_embedding = client.encode(col, model="Supabase/gte-small")

        # Calcula a similaridade de cosseno
        similarities = [cosine_similarity(col_embedding, emb) for emb in expected_embeddings]

        # Encontra a coluna esperada com maior similaridade
        best_match_idx = similarities.index(max(similarities))
        best_match = expected_columns[best_match_idx]
        score = similarities[best_match_idx]

        suggestions[col] = {"match": best_match, "confidence": round(score, 2)}

    return {"suggestions": suggestions}

# Função para calcular a similaridade de cosseno
def cosine_similarity(a, b):
    dot_product = sum(x * y for x, y in zip(a, b))
    magnitude_a = sum(x**2 for x in a)**0.5
    magnitude_b = sum(x**2 for x in b)**0.5
    return dot_product / (magnitude_a * magnitude_b)

# Crie uma rota para o Swagger UI
app.mount("/static", StaticFiles(directory="static"), name="static")
app.get("/docs", response_class=HTMLResponse)(swagger_ui)

# Interface do Gradio
iface = gr.Interface(
    fn=suggest_columns,
    inputs=gr.JSON(label="Colunas"),
    outputs=gr.JSON(label="Sugestões"),
    title="Sugestão de Colunas",
    description="API para sugerir colunas com base em embeddings.",
)

# Inicie a API FastAPI e o Gradio
if __name__ == "__main__":
  config = uvicorn.Config(app, host="0.0.0.0", port=8000)
  server = uvicorn.Server(config)

  # Inicie o Gradio em uma thread separada
  threading.Thread(target=iface.launch, kwargs={"share": True}).start()

  # Inicie o servidor FastAPI
  server.run()

RuntimeError: asyncio.run() cannot be called from a running event loop

In [5]:
!pip install gradio fastapi uvicorn python-multipart huggingface_hub sentence-transformers

Collecting gradio
  Downloading gradio-5.16.0-py3-none-any.whl.metadata (16 kB)
Collecting python-multipart
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.0 (from gradio)
  Downloading gradio_client-1.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.9.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collec