# Self‑Consistency & Uncertainty Calibration  
Generate multiple completions, perform majority‑vote (self‑consistency), and prompt the model for confidence scores.

In [None]:
!pip -q install openai pandas ipywidgets textstat

In [None]:
import os, openai, pandas as pd, ipywidgets as w
from collections import Counter
from IPython.display import display, Markdown
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'sk-')

prompt_box = w.Textarea(
    value='What is 17 * 23? Think step‑by‑step.',
    layout=w.Layout(width='100%', height='60px'), description='Prompt:')

n_runs = w.IntSlider(value=5, min=2, max=10, step=1, description='Runs (n):')
temperature = w.FloatSlider(value=0.7, min=0, max=2, step=0.1, description='Temp')
run_btn = w.Button(description='Generate n Completions')
out = w.Output()

def majority_vote(answers):
    return Counter(answers).most_common(1)[0]

def run_sc(_):
    with out:
        out.clear_output()
        completions = []
        for i in range(n_runs.value):
            resp = openai.ChatCompletion.create(
                model='gpt-4o-mini',
                messages=[{'role':'user','content': prompt_box.value}],
                temperature=temperature.value,
                max_tokens=128
            )
            completions.append(resp.choices[0].message.content.strip())
        df = pd.DataFrame({'completion': completions})
        display(df)
        winner, count = majority_vote(completions)
        print(f"🏆 Majority answer ({count}/{n_runs.value}):
{winner}")

run_btn.on_click(run_sc)
display(w.VBox([prompt_box, n_runs, temperature, run_btn, out]))

## Prompting for Confidence / Uncertainty  
Try adding: `"Rate your confidence 0–100 then answer:"` to the prompt, and check how scores vary.