In [1]:
import numpy as np
import pandas as pd
from openai import OpenAI

client = OpenAI()

# Mathematical Modeling

In [7]:
import numpy as np
from openai import OpenAI

# Your mathematical functions
def compute_entropy(probability_distribution): # shannon entropy
    return -np.sum(probability_distribution * np.log(probability_distribution + 1e-12)) # 정보를 인코딩할 때의 정보량이 아니라, 정보의 불확실성을 측정하는 것이라는 점에서 차이가 있음.

def compute_conditional_entropy(joint_distribution, marginal_distribution):
    return -np.sum(joint_distribution * np.log((joint_distribution / (marginal_distribution + 1e-12)) + 1e-12))

def compute_mutual_information(joint_distribution, marginal_distribution):
    return compute_entropy(marginal_distribution) - compute_conditional_entropy(joint_distribution, marginal_distribution)

def compute_information_gain(joint_distribution, marginal_distribution):
    return compute_entropy(marginal_distribution) - compute_conditional_entropy(joint_distribution, marginal_distribution)

def compute_relative_entropy(p, q):
    return np.sum(p * np.log((p / (q + 1e-12)) + 1e-12))

def compute_kl_divergence(p, q):
    return np.sum(p * np.log((p / (q + 1e-12)) + 1e-12))

def compute_jensen_shannon_divergence(p, q):
    m = 0.5 * (p + q)
    return 0.5 * compute_kl_divergence(p, m) + 0.5 * compute_kl_divergence(q, m)

def compute_chi_square_statistic(observed, expected):
    return np.sum(((observed - expected) ** 2) / (expected + 1e-12))

# Initialize the OpenAI client
client = OpenAI()

# Classification function
def classify_text(input_text):
    messages = [{"role": "user", "content": input_text}]
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        logprobs=True,
        max_tokens=1,
        top_logprobs=3
    )
    
    # Extract the model's prediction
    prediction = response.choices[0].message.content.strip()
    
    # Extract the log probabilities
    logprobs = response.choices[0].logprobs.content[0].top_logprobs
    
    # Convert log probabilities to probabilities
    probabilities = np.exp(np.array(list(logprobs.values())))
    
    # Normalize the probabilities
    probability_distribution = probabilities / probabilities.sum()
    
    # Compute entropy
    entropy = compute_entropy(probability_distribution)
    
    # Compute confidence (lower entropy means higher confidence)
    max_entropy = np.log(len(probability_distribution))
    confidence = 1 - (entropy / max_entropy)
    
    return prediction, confidence

# Example usage
input_text = "Is it going to rain tomorrow?"
prediction, confidence = classify_text(input_text)

print(f"Prediction: {prediction}")
print(f"Confidence: {confidence:.2f}")


AttributeError: 'list' object has no attribute 'values'

In [14]:
from openai import OpenAI

client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": """"Analyze the following text and determine if it is a positive or negative review. Answer with only one word, considering the nuanced emotional impact and potential long-term effects on the viewer: 'The film's intricate narrative weaved a tapestry of conflicting emotions, challenging societal norms while simultaneously reinforcing traditional values, leaving the audience in a state of cognitive dissonance long after the credits rolled.'"""}
    ],
    logprobs=True,
    max_tokens=1,
    top_logprobs=5,
    temperature=0
)


In [5]:
response.choices[0]

Choice(finish_reason='length', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='The', bytes=[84, 104, 101], logprob=-3.1281633e-07, top_logprobs=[TopLogprob(token='The', bytes=[84, 104, 101], logprob=-3.1281633e-07), TopLogprob(token='Paris', bytes=[80, 97, 114, 105, 115], logprob=-15.75), TopLogprob(token=' The', bytes=[32, 84, 104, 101], logprob=-16.875), TopLogprob(token='France', bytes=[70, 114, 97, 110, 99, 101], logprob=-20.625), TopLogprob(token='As', bytes=[65, 115], logprob=-22.125)])], refusal=None), message=ChatCompletionMessage(content='The', role='assistant', function_call=None, tool_calls=None, refusal=None))

In [6]:
response.choices[0].message

ChatCompletionMessage(content='The', role='assistant', function_call=None, tool_calls=None, refusal=None)

In [12]:
response.choices[0].logprobs

ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07, top_logprobs=[TopLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07), TopLogprob(token='Paris', bytes=[80, 97, 114, 105, 115], logprob=-14.375001), TopLogprob(token=' The', bytes=[32, 84, 104, 101], logprob=-18.0)])], refusal=None)

In [13]:
response.choices[0].logprobs.content

[ChatCompletionTokenLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07, top_logprobs=[TopLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07), TopLogprob(token='Paris', bytes=[80, 97, 114, 105, 115], logprob=-14.375001), TopLogprob(token=' The', bytes=[32, 84, 104, 101], logprob=-18.0)])]

In [16]:
response.choices[0].logprobs.content[0].top_logprobs



[TopLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07),
 TopLogprob(token='Paris', bytes=[80, 97, 114, 105, 115], logprob=-14.375001),
 TopLogprob(token=' The', bytes=[32, 84, 104, 101], logprob=-18.0)]

In [21]:
i = 0

for content in response.choices[0].logprobs.content[0].top_logprobs:
    print(content.token)
    print(content.logprob)
    print(np.exp(content.logprob))
    i += 1
    
print(i)

The
-6.704273e-07
0.9999993295729247
Paris
-14.375001
5.715002021461846e-07
 The
-18.0
1.522997974471263e-08
3


In [22]:
response

ChatCompletion(id='chatcmpl-AF1CEbBCDDrW8RkTNP9UwFRiduH9e', choices=[Choice(finish_reason='length', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07, top_logprobs=[TopLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07), TopLogprob(token='Paris', bytes=[80, 97, 114, 105, 115], logprob=-14.375001), TopLogprob(token=' The', bytes=[32, 84, 104, 101], logprob=-18.0)])], refusal=None), message=ChatCompletionMessage(content='The', role='assistant', function_call=None, tool_calls=None, refusal=None))], created=1728142882, model='gpt-4o-2024-08-06', object='chat.completion', system_fingerprint='fp_143bb8492c', usage=CompletionUsage(completion_tokens=1, prompt_tokens=24, total_tokens=25, prompt_tokens_details={'cached_tokens': 0}, completion_tokens_details={'reasoning_tokens': 0}))

In [15]:
# Import necessary libraries
import json
from pprint import pprint
import numpy as np 

# Convert the response object to a dictionary
response_dict = {
    "id": response.id,
    "choices": [{
        "finish_reason": choice.finish_reason,
        "index": choice.index,
        "logprobs": {
            "content": [{
                "token": token.token,
                "logprob": token.logprob,
                "top_logprobs": [{
                    "token": top.token,
                    "logprob": top.logprob
                } for top in token.top_logprobs]
            } for token in choice.logprobs.content]
        },
        "message": {
            "content": choice.message.content,
            "role": choice.message.role
        }
    } for choice in response.choices],
    "created": response.created,
    "model": response.model,
    "object": response.object,
    "system_fingerprint": response.system_fingerprint,
    "usage": {
        "completion_tokens": response.usage.completion_tokens,
        "prompt_tokens": response.usage.prompt_tokens,
        "total_tokens": response.usage.total_tokens
    }
}

# Pretty print the response dictionary
print("Pretty-printed response:")
pprint(response_dict, width=100, compact=False)

# Optionally, you can also save the response to a JSON file
with open('response.json', 'w') as f:
    json.dump(response_dict, f, indent=4)
print("Response saved to 'response.json'")

for logprob in response.choices[0].logprobs.content[0].top_logprobs:
    print(np.exp(logprob.logprob))


Pretty-printed response:
{'choices': [{'finish_reason': 'length',
              'index': 0,
              'logprobs': {'content': [{'logprob': -0.20231643,
                                        'token': 'Positive',
                                        'top_logprobs': [{'logprob': -0.20231643,
                                                          'token': 'Positive'},
                                                         {'logprob': -1.7023164,
                                                          'token': 'Negative'},
                                                         {'logprob': -7.0773163,
                                                          'token': 'Neutral'},
                                                         {'logprob': -10.327316, 'token': 'Mixed'},
                                                         {'logprob': -10.827316, 'token': '"'}]}]},
              'message': {'content': 'Positive', 'role': 'assistant'}}],
 'created': 1728181562,
 'i

In [25]:
response

ChatCompletion(id='chatcmpl-AF1CEbBCDDrW8RkTNP9UwFRiduH9e', choices=[Choice(finish_reason='length', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07, top_logprobs=[TopLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07), TopLogprob(token='Paris', bytes=[80, 97, 114, 105, 115], logprob=-14.375001), TopLogprob(token=' The', bytes=[32, 84, 104, 101], logprob=-18.0)])], refusal=None), message=ChatCompletionMessage(content='The', role='assistant', function_call=None, tool_calls=None, refusal=None))], created=1728142882, model='gpt-4o-2024-08-06', object='chat.completion', system_fingerprint='fp_143bb8492c', usage=CompletionUsage(completion_tokens=1, prompt_tokens=24, total_tokens=25, prompt_tokens_details={'cached_tokens': 0}, completion_tokens_details={'reasoning_tokens': 0}))

[0.2, 0.2, 0.2, 0.2, 0.2]

[0.99, 0.01, 0, 0, 0]


확률 10개로 구성된 확률분포가 있을 때 
uniform -> uncertainty가 가장 높음.

p = [p1, p2, p3, p4]
p1 + p2 + p3 + p4 = 1


entropy = -sum(p * log(p))



In [18]:
top_logprobs = response.choices[0].logprobs.content[0].top_logprobs

In [19]:
linearProb = [np.exp(logprob.logprob) for logprob in top_logprobs]

In [20]:
linearProb

[0.8168364154968915,
 0.18226084567271605,
 0.0008440352490189136,
 3.272680798079951e-05,
 1.9849812434883006e-05]

In [21]:
import numpy as np

def calculate_confidence(logprobs):
    # Extract the log probabilities
    log_probs = np.array([lp.logprob for lp in logprobs])
    
    # Convert log probabilities to probabilities
    probs = np.exp(log_probs)
    
    # Normalize the probabilities
    probs = probs / np.sum(probs)
    
    # Calculate entropy
    entropy = -np.sum(probs * np.log2(probs))
    
    # Calculate confidence (1 - normalized entropy)
    max_entropy = np.log2(len(probs))  # maximum possible entropy
    normalized_entropy = entropy / max_entropy
    confidence = 1 - normalized_entropy
    
    return confidence

# Extract logprobs from the response
logprobs = response.choices[0].logprobs.content[0].top_logprobs

# Calculate and print the confidence
confidence = calculate_confidence(logprobs)
print(f"Model confidence: {confidence:.4f}")

Model confidence: 0.7005


In [28]:
logprobs

[TopLogprob(token='The', bytes=[84, 104, 101], logprob=-6.704273e-07),
 TopLogprob(token='Paris', bytes=[80, 97, 114, 105, 115], logprob=-14.375001),
 TopLogprob(token=' The', bytes=[32, 84, 104, 101], logprob=-18.0)]

In [30]:
for logprob in logprobs:
    print(logprob.token)
    print(logprob.logprob)
    print(np.exp(logprob.logprob))



The
-6.704273e-07
0.9999993295729247
Paris
-14.375001
5.715002021461846e-07
 The
-18.0
1.522997974471263e-08


# Some Other Methods

1. Maximum Softmax Probability -> 가장 확률 높은걸 취한다 
2. Prediction Margin -> 가장 확률 높은거랑 두번째로 확률 높은거의 차이를 취한다. 
3. Monte Carlo Dropout -> 모델을 여러번 실행하고 그 중 가장 확률 높은거를 취한다. -> 각각의 응답 생성시에 그 자신감을 측정하겠다. (이 컨셉은 pipeline evaluation) -> prompt tuning + temperature sampling
4. Bayesian Neural Networks -> 모델을 여러번 실행하고 그 중 가장 확률 높은거를 취한다. (finetuning)
5. Information Theory-based Confidence -> 확률분포의 불확실성을 측정한다. (entropy)
6. Ensemble Methods -> 모델을 여러가지를 동시에 실행하고 종합하여 결론을 내림
7. Gradient Magnitude -> 모델의 기울기를 측정한다. (이거는 API상으로 불가능)
8. Confidence in the Confidence (모델 응답의 확률 분포의 자신감)
9. Self-Supervised Confidence Estimation -> 모델의 자신감을 측정한다. (이거는 API상으로 불가능)
10. Adversarial Examples
11. Human-in-the-Loop

In [24]:
# Maximum Softmax Probability
import numpy as np
from openai import OpenAI

# Initialize the OpenAI client with your API key
client = OpenAI()

def classify_with_msp(input_text):
    messages = [{"role": "user", "content": input_text}]
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=1,
        logprobs=True,
        top_logprobs=5,
        temperature=0,  # Deterministic output
    )
    
    # Extract the model's prediction
    prediction = response.choices[0].message.content.strip()
    
    # Extract log probabilities
    logprobs = [logprob.logprob for logprob in response.choices[0].logprobs.content[0].top_logprobs]
    
    # Convert log probabilities to probabilities
    probabilities = np.exp(np.array(logprobs))
    
    # Normalize probabilities
    probability_distribution = probabilities / probabilities.sum()
    
    # Maximum Softmax Probability
    confidence = np.max(probability_distribution)
    
    return prediction, confidence, probability_distribution

# Example usage
input_text = "Analyze the following text and determine if it is a positive or negative review. Answer with only one word, considering the nuanced emotional impact and potential long-term effects on the viewer: 'The film's intricate narrative weaved a tapestry of conflicting emotions, challenging societal norms while simultaneously reinforcing traditional values, leaving the audience in a state of cognitive dissonance long after the credits rolled.'"
prediction, confidence, probability_distribution = classify_with_msp(input_text)
print(f"Prediction: {prediction}")
print(f"Confidence (MSP): {confidence:.2f}")
print(f"Probability Distribution: {probability_distribution}")

Prediction: Positive
Confidence (MSP): 0.88
Probability Distribution: [8.80430608e-01 1.19153328e-01 3.79238925e-04 3.52747149e-05
 1.54986281e-06]


In [22]:
response.choices[0].logprobs.content[0].top_logprobs[0]

TopLogprob(token='Positive', bytes=[80, 111, 115, 105, 116, 105, 118, 101], logprob=-0.20231643)

In [23]:
# Prediction Margin

def classify_with_margin(input_text):
    messages = [{"role": "user", "content": input_text}]
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=1,
        logprobs=True,
        top_logprobs=5,
        temperature=0,
        n=1
    )
    
    # Extract the model's prediction
    prediction = response.choices[0].message.content.strip()
    
    # Extract log probabilities
    logprobs = [logprob.logprob for logprob in response.choices[0].logprobs.content[0].top_logprobs]
    
    # Convert log probabilities to probabilities
    probabilities = np.exp(np.array(logprobs))
    
    # Normalize probabilities
    probability_distribution = probabilities / probabilities.sum()
    
    # Sort probabilities
    sorted_probs = np.sort(probability_distribution)
    
    # Prediction Margin
    if len(sorted_probs) > 1:
        margin = sorted_probs[-1] - sorted_probs[-2]
    else:
        margin = sorted_probs[-1]
    
    confidence = margin
    
    return prediction, confidence

# Example usage
input_text = "Analyze the following text and determine if it is a positive or negative review. Answer with only one word, considering the nuanced emotional impact and potential long-term effects on the viewer: 'The film's intricate narrative weaved a tapestry of conflicting emotions, challenging societal norms while simultaneously reinforcing traditional values, leaving the audience in a state of cognitive dissonance long after the credits rolled.'"
prediction, confidence = classify_with_margin(input_text)
print(f"Prediction: {prediction}")
print(f"Confidence (Margin): {confidence:.2f}")


Prediction: Positive
Confidence (Margin): 0.36


In [25]:
# Information Theory-based Confidence

def compute_entropy(probabilities):
    return -np.sum(probabilities * np.log(probabilities + 1e-12))

def classify_with_entropy(input_text):
    messages = [{"role": "user", "content": input_text}]
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=1,
        logprobs=True,
        top_logprobs=5,
        temperature=0,
    )
    
    # Extract prediction
    prediction = response.choices[0].message.content.strip()
    
    # Extract log probabilities
    logprobs = [logprob.logprob for logprob in response.choices[0].logprobs.content[0].top_logprobs]
    
    # Convert to probabilities
    probabilities = np.exp(np.array(logprobs))
    probabilities /= probabilities.sum()
    
    # Compute entropy
    entropy = compute_entropy(probabilities)
    
    # Normalize entropy to get confidence between 0 and 1
    max_entropy = np.log(len(probabilities))
    confidence = 1 - (entropy / max_entropy)
    
    return prediction, confidence

# Example usage
prediction, confidence = classify_with_entropy(input_text)
print(f"Prediction: {prediction}")
print(f"Confidence (Entropy): {confidence:.2f}")


Prediction: Positive
Confidence (Entropy): 0.58


In [27]:
from openai import OpenAI
import numpy as np

client = OpenAI()

def classify_with_temperature_sampling(input_text, num_samples=300, temperature=0.7):
    messages = [{"role": "user", "content": input_text}]
    predictions = []

    for _ in range(num_samples):
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            max_tokens=1,
            temperature=temperature,
            logprobs=True,
            top_logprobs=5,
        )
        
        # for i, logprob in enumerate(response.choices[0].logprobs.content[0].top_logprobs):
        #     predictions.append(logprob.token)
        prediction = response.choices[0].message.content.strip()
        predictions.append(prediction)

    # Calculate prediction frequencies
    unique_predictions, counts = np.unique(predictions, return_counts=True)
    probabilities = counts / num_samples

    # Confidence is the probability of the most frequent prediction
    max_index = np.argmax(probabilities)
    final_prediction = unique_predictions[max_index]
    confidence = probabilities[max_index]
    
    # Entropy of the prediction distribution
    entropy = -np.sum(probabilities * np.log(probabilities + 1e-12))
    max_entropy = np.log(len(unique_predictions))
    
    # Avoid division by zero
    if max_entropy == 0:
        normalized_entropy = 0
    else:
        normalized_entropy = entropy / max_entropy
    
    confidence_from_entropy = 1 - normalized_entropy

    return final_prediction, confidence, confidence_from_entropy

# Example usage
input_text = "Analyze the following text and determine if it is a positive or negative review. Answer with only one word, considering the nuanced emotional impact and potential long-term effects on the viewer: 'The film's intricate narrative weaved a tapestry of conflicting emotions, challenging societal norms while simultaneously reinforcing traditional values, leaving the audience in a state of cognitive dissonance long after the credits rolled.'"
prediction, confidence, entropy_confidence = classify_with_temperature_sampling(input_text)

print(f"Prediction: {prediction}")
print(f"Confidence (Frequency): {confidence:.2f}")
print(f"Confidence (Entropy-based): {entropy_confidence:.2f}")


Prediction: Positive
Confidence (Frequency): 0.90
Confidence (Entropy-based): 0.53


In [28]:

for i in range(10):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": "Analyze the following text and determine if it is a positive or negative review. Answer with only one word, considering the nuanced emotional impact and potential long-term effects on the viewer: 'The film's intricate narrative weaved a tapestry of conflicting emotions, challenging societal norms while simultaneously reinforcing traditional values, leaving the audience in a state of cognitive dissonance long after the credits rolled.'"}
        ],
        logprobs=True,
        max_tokens=1,
        top_logprobs=5,
    )
    print(response.choices[0].message.content.strip())



Positive
Negative
Positive
Positive
Negative
Positive
Positive
Neutral
Positive
Positive


1차 검증 -> [classification]  
비교하는 분포를 다르게 설정

In [29]:
import numpy as np
from openai import OpenAI

client = OpenAI()  # Assumes API key is set in environment variable

def compute_kl_divergence(p, q):
    return np.sum(p * np.log((p + 1e-12) / (q + 1e-12)))

def classify_with_kl_divergence(input_text):
    messages = [{"role": "user", "content": input_text}]
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=1,
        logprobs=True,
        temperature=0,
        top_logprobs=5,
    )
    
    # Extract prediction
    prediction = response.choices[0].message.content.strip()
    
    # Extract log probabilities
    logprobs = response.choices[0].logprobs.content[0].top_logprobs
    tokens = [item.token for item in logprobs]
    logprob_values = np.array([item.logprob for item in logprobs])
    
    # Convert to probabilities
    probabilities = np.exp(logprob_values)
    probabilities /= probabilities.sum()
    
    # Reference distribution (maximum entropy distribution)
    max_entropy_probs = np.ones_like(probabilities) / len(probabilities)
    
    # Compute KL Divergence
    kl_div = compute_kl_divergence(probabilities, max_entropy_probs)
    
    # Normalize KL Divergence to [0,1] for confidence
    max_kl_div = np.log(len(probabilities))
    
    # Avoid division by zero
    if max_kl_div == 0:
        confidence = 0
    else:
        confidence = kl_div / max_kl_div
    
    return prediction, confidence, probabilities, tokens

# Example usage
input_text = "Analyze the following text and determine if it is a positive or negative review. Answer with only one word, considering the nuanced emotional impact and potential long-term effects on the viewer: 'The film's intricate narrative weaved a tapestry of conflicting emotions, challenging societal norms while simultaneously reinforcing traditional values, leaving the audience in a state of cognitive dissonance long after the credits rolled.'"
prediction, confidence, probabilities, tokens = classify_with_kl_divergence(input_text)
print(f"Prediction: {prediction}")
print(f"Confidence (KL Divergence): {confidence:.2f}")
print("\nProbability distribution:")
for token, prob in zip(tokens, probabilities):
    print(f"{token}: {prob:.4f}")


Prediction: Positive
Confidence (KL Divergence): 0.61

Probability distribution:
Positive: 0.6789
Negative: 0.3207
Neutral: 0.0004
Mixed: 0.0001
Complex: 0.0000


In [30]:
def compute_information_gain(probabilities):
    max_entropy = np.log(len(probabilities))
    entropy = -np.sum(probabilities * np.log(probabilities + 1e-12))
    ig = max_entropy - entropy
    return ig

def classify_with_information_gain(input_text):
    messages = [{"role": "user", "content": input_text}]
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=1,
        logprobs=True,
        temperature=0,
        top_logprobs=5
    )
    
    prediction = response.choices[0].message.content.strip()
    
    logprobs = response.choices[0].logprobs.content[0].top_logprobs
    logprob_values = np.array([item.logprob for item in logprobs])
    probabilities = np.exp(logprob_values)
    probabilities /= probabilities.sum()
    
    # Compute Information Gain
    ig = compute_information_gain(probabilities)
    
    # Normalize IG to [0,1]
    max_ig = np.log(len(probabilities))
    confidence = ig / max_ig
    
    return prediction, confidence

# Example usage
prediction, confidence = classify_with_information_gain(input_text)
print(f"Prediction: {prediction}")
print(f"Confidence (Information Gain): {confidence:.2f}")


Prediction: Positive
Confidence (Information Gain): 0.58


# 이쯤 생길만한 의문이 좀 있을 것 같아서...

Why Use These Information Theory Measures?   


KL Divergence: Measures how much the model's predicted distribution diverges from a uniform distribution. A higher divergence indicates that the model's predictions are concentrated on specific tokens, suggesting higher confidence.

Jensen-Shannon Divergence: A symmetric and smoothed version of KL Divergence, providing a normalized measure of similarity between distributions.

Information Gain: Reflects the reduction in uncertainty after observing the model's output. Higher information gain implies that the model's output provides significant information about the diagnosis.

# EXAMPLE USE CASE

Objective: Develop a language model pipeline that provides diagnostic suggestions based on patient symptoms, with a focus on ensuring high confidence in the model's responses.



In [33]:
def compute_entropy(probabilities):
    return -np.sum(probabilities * np.log(probabilities + 1e-12))

def compute_kl_divergence(p, q):
    return np.sum(p * np.log((p + 1e-12) / (q + 1e-12)))

def compute_jensen_shannon_divergence(p, q):
    m = 0.5 * (p + q)
    jsd = 0.5 * compute_kl_divergence(p, m) + 0.5 * compute_kl_divergence(q, m)
    return jsd

def compute_information_gain(probabilities):
    max_entropy = np.log(len(probabilities))
    entropy = compute_entropy(probabilities)
    ig = max_entropy - entropy
    return ig


In [34]:
def compute_kl_divergence_confidence(probabilities):
    uniform_probabilities = np.ones_like(probabilities) / len(probabilities)
    kl_div = compute_kl_divergence(probabilities, uniform_probabilities)
    max_kl_div = np.log(len(probabilities))
    confidence = kl_div / max_kl_div
    return confidence

def compute_jsd_confidence(probabilities):
    uniform_probabilities = np.ones_like(probabilities) / len(probabilities)
    jsd = compute_jensen_shannon_divergence(probabilities, uniform_probabilities)
    max_jsd = np.log(2)
    confidence = jsd / max_jsd
    return confidence

def compute_information_gain_confidence(probabilities):
    ig = compute_information_gain(probabilities)
    max_ig = np.log(len(probabilities))
    confidence = ig / max_ig
    return confidence


In [35]:
def diagnose_with_confidence(symptoms_text):
    messages = [{"role": "user", "content": symptoms_text}]
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=1,  # Allow for detailed responses
        temperature=0,  # Deterministic output
        logprobs=True,
        top_logprobs=5
    )
    
    # Extract the model's response
    diagnosis = response.choices[0].message.content.strip()
    
    # Extract log probabilities of the first token (as an approximation)
    logprobs = response.choices[0].logprobs.content[0].top_logprobs
    tokens = [logprob.token for logprob in logprobs]
    logprob_values = np.array([logprob.logprob for logprob in logprobs])
    
    # Convert log probabilities to probabilities
    probabilities = np.exp(logprob_values)
    probabilities /= probabilities.sum()
    
    # Compute confidence measures
    kl_confidence = compute_kl_divergence_confidence(probabilities)
    jsd_confidence = compute_jsd_confidence(probabilities)
    ig_confidence = compute_information_gain_confidence(probabilities)
    
    # Combine confidence scores (weighted sum)
    combined_confidence = (0.4 * kl_confidence) + (0.3 * jsd_confidence) + (0.3 * ig_confidence)
    
    return diagnosis, combined_confidence


In [38]:
CONFIDENCE_THRESHOLD = 0.75  # Adjust based on validation results

# Example patient symptoms
symptoms_text = """
Patient presents with the following symptoms:
1. Persistent chest pain
2. Severe shortness of breath
3. High fever
4. Productive cough with greenish or yellow sputum
Additional notes:
- Chest pain worsens when breathing deeply or coughing
- Shortness of breath even at rest
- Fatigue and weakness
- Rapid breathing and increased heart rate
- Possible bluish tint to lips or fingernails (cyanosis)

Medical history suggests patient is suffering from pneumonia.
"""

diagnosis, confidence = diagnose_with_confidence(symptoms_text)

if confidence >= CONFIDENCE_THRESHOLD:
    print(f"Diagnosis: {diagnosis}")
    print(f"Confidence: {confidence:.2f}")
else:
    print("The model is not confident enough in its diagnosis.")
    print(f"Confidence: {confidence:.2f}")
    print("Recommendation: Consult a medical professional for further evaluation.")


The model is not confident enough in its diagnosis.
Confidence: 0.55
Recommendation: Consult a medical professional for further evaluation.
