In [4]:
# üîπ 2. Importar bibliotecas necess√°rias
from openai import OpenAI
import os
import pandas as pd
from dotenv import load_dotenv

load_dotenv()
# üîπ 3. Configurar a chave de API
# Defina sua chave de API como vari√°vel de ambiente ou diretamente aqui (n√£o recomendado em notebooks p√∫blicos)
# os.environ["OPENAI_API_KEY"] = "sk-xxxx"
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [7]:
# üîπ 4. Fun√ß√£o para enviar prompt e capturar logprobs
def ask_gpt_with_logprobs(prompt: str, model: str = "gpt-4o-mini", top_logprobs: int = 5):
    """
    Envia um prompt ao modelo GPT e retorna:
      - resposta gerada
      - DataFrame com tokens e logprobs
    """
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        logprobs=True,
        top_p=0.9,
        top_logprobs=top_logprobs,
        max_tokens=10
    )

    # Extrair texto principal
    message = response.choices[0].message.content

    # Acessar tokens com logprobs
    token_data = response.choices[0].logprobs.content  # lista de ChatCompletionTokenLogprob

    # Extrair informa√ß√µes relevantes
    tokens, logprobs, top_alternatives = [], [], []
    for t in token_data:
        tokens.append(t.token)
        logprobs.append(t.logprob)
        # Salva as top alternativas para an√°lise posterior
        alt = [(alt.token, alt.logprob) for alt in t.top_logprobs]
        top_alternatives.append(alt)

    # Criar tabela
    df = pd.DataFrame({
        "Token": tokens,
        "LogProb": logprobs,
        "Top Alternatives": top_alternatives
    })

    return message, df


In [8]:
# üîπ 5. Teste da fun√ß√£o
prompt = "Complete a frase: O rato roeu a roupa do rei de ."
# prompt = "Complete a frase: Deus ajuda quem "
resposta, tabela_logprobs = ask_gpt_with_logprobs(prompt)


# üîπ 6. Exibir resultados
print("üß† Resposta do GPT:\n", resposta)
print("\nüìä Tokens e LogProbs:\n")
display(tabela_logprobs)

üß† Resposta do GPT:
 O rato roeu a roupa do rei de Roma

üìä Tokens e LogProbs:



Unnamed: 0,Token,LogProb,Top Alternatives
0,O,-0.006069579,"[(O, -0.006069579161703587), (""O, -5.256069660..."
1,rato,-3.128163e-07,"[( rato, -3.128163257315464e-07), ( rat, -15.8..."
2,ro,0.0,"[( ro, 0.0), (-ro, -17.875), (ro, -18.625), ( ..."
3,eu,-3.128163e-07,"[(eu, -3.128163257315464e-07), (Eu, -15.625), ..."
4,a,0.0,"[( a, 0.0), ( A, -18.75), ( √†, -20.25), ( o, -..."
5,roupa,-2.27231e-05,"[( roupa, -2.2723104848410003e-05), ( ropa, -1..."
6,do,0.0,"[( do, 0.0), ( da, -19.75), (do, -20.875), ( d..."
7,rei,-3.988843e-05,"[( rei, -3.9888433093437925e-05), ( Rei, -10.2..."
8,de,-0.02071903,"[( de, -0.02071903459727764), ( da, -3.8957190..."
9,Roma,-0.003809692,"[( Roma, -0.0038096921052783728), ( Fran√ßa, -6..."


In [None]:
!pip install matplotlib

In [61]:
# üîπ 7. Visualizar probabilidades alternativas de tokens
import math
import matplotlib.pyplot as plt

def plot_token_alternatives(df, token_index=0):
    """
    Mostra um gr√°fico de barras com as top alternativas de token e suas probabilidades reais (exp(logprob)).
    """
    if token_index >= len(df):
        print("√çndice fora do intervalo. Escolha entre 0 e", len(df)-1)
        return
    
    token_info = df.iloc[token_index]
    alternatives = token_info["Top Alternatives"]
    
    tokens = [t[0] for t in alternatives]
    probs = [math.exp(t[1]) for t in alternatives]  # converte logprobs para probabilidade real
    probs = [p / sum(probs) for p in probs]  # normaliza para somar 1

    plt.figure(figsize=(8, 4))
    plt.bar(tokens, probs)
    plt.title(f"Distribui√ß√£o de Probabilidades - Token {token_index}: '{token_info['Token']}'")
    plt.ylabel("Probabilidade normalizada")
    plt.xlabel("Alternativas de tokens")
    plt.show()



In [None]:
# Exemplo: exibir o gr√°fico das alternativas do 5¬∫ token
plot_token_alternatives(tabela_logprobs, token_index=6)