In [1]:
#!pip install --upgrade transformers

In [9]:
!pip install triton



In [2]:
import json
from collections import Counter

In [3]:
from pathlib import Path
import os

from huggingface_hub import hf_hub_download, HfApi
from huggingface_hub.utils import GatedRepoError, HfHubHTTPError, RepositoryNotFoundError
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
)

2025-08-06 17:15:52.678165: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-06 17:15:52.691745: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754500552.709738    1328 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754500552.715286    1328 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-08-06 17:15:52.732978: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [4]:
!pip install -q python-dotenv  # install once per image
from dotenv import load_dotenv
load_dotenv(dotenv_path=".env", override=True)  # set True if you want to overwrite

True

In [5]:
#model_name = 'Qwen/Qwen3-0.6B'
#model_name = 'meta-llama/Llama-3.2-1B'
#model_name = 'google/gemma-3-1b-it'
model_name = 'openai/gpt-oss-20b'

In [6]:
# Path to the survey JSON file
survey_path = Path('political_survey_rep.json')  # update with your own file path

with open(survey_path) as f:
    survey = json.load(f)

preamble = survey.get('preamble', '')
questions = survey['questions']

In [7]:
def load_model(model_name: str, *, token: str | None = None):
    """Return (tokenizer, model) or raise."""
    tok = AutoTokenizer.from_pretrained(model_name, token=token)
    mdl = AutoModelForCausalLM.from_pretrained(model_name, token=token)
    return tok, mdl

def init_generator(model_name):
    FALLBACK_MODEL = 'Qwen/Qwen3-0.6B'
    
    token = os.getenv("HF_TOKEN")  # optional; set it once and forget
    try:
        tokenizer, model = load_model(model_name, token=token)
        print(f"✅ Loaded {model_name}")
    except GatedRepoError:
        msg = f"{model_name} is gated and your account lacks access."
        print(msg)
        print("Falling back to an open model so you can keep working…")
        tokenizer, model = load_model(FALLBACK_MODEL, token=token)
    except (RepositoryNotFoundError, HfHubHTTPError) as e:
        print(f"Hub error while fetching {model_name}: {e}")
        raise
        
    return pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        pad_token_id=tokenizer.eos_token_id,
    )

In [8]:
# Initialize model
tokenizer   = AutoTokenizer.from_pretrained(model_name)
generator = init_generator(model_name) 

MXFP4 quantization requires triton >= 3.4.0 and triton_kernels installed, we will default to dequantizing the model to bf16


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/165 [00:00<?, ?B/s]

Device set to use cuda:0


✅ Loaded openai/gpt-oss-20b


OutOfMemoryError: CUDA out of memory. Tried to allocate 508.00 MiB. GPU 0 has a total capacity of 14.57 GiB of which 2.75 MiB is free. Process 7659 has 14.56 GiB memory in use. Of the allocated memory 14.38 GiB is allocated by PyTorch, and 66.25 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
choice_labels = [chr(ord('A') + i) for i in range(26)]

def ask_question_small(question, generator, preamble="You are a helpful assistant."):
    """
    Pose a multiple-choice question to a small language model and
    return the text of the chosen answer.
    """
    q_text  = question["question"]
    choices = question["choices"]

    # Format the list of choices
    formatted_choices = "\n".join(
        f"{choice_labels[i]}. {c}" for i, c in enumerate(choices)
    )

    # One-shot demonstration so the model sees the desired behaviour once
    few_shot = (
        "Answer each multiple-choice question with **one capital letter only** "
        "and no extra text.\n"
        "Example:\n"
        "Question: The sky is blue.\n"
        "Choices:\n"
        "A. Yes\nB. No\n\n"
        "Answer: A\n###\n"
    )

    # Build the prompt
    prompt = (
        f"{preamble}\n\n"
        f"{few_shot}"
        f"Question: {q_text}\n"
        f"Choices:\n{formatted_choices}\n\n"
        f"Answer:"
    )

    # Ask the model.
    generated = generator(
        prompt,
        max_new_tokens=2,          # enough for "A" plus a newline/space
        do_sample=True,           # deterministic
        return_full_text=False     # we only need the answer
    )[0]["generated_text"].strip().upper()

    # Normalise and map back to the answer text
    if generated and generated[0] in choice_labels[:len(choices)]:
        return choices[choice_labels.index(generated[0])]

    # Fallback: look for the full choice text inside a longer response
    for c in choices:
        if c.lower() in generated.lower():
            return c
    return None


In [None]:
N = 100
results = {q['question']: Counter() for q in questions}

for _ in range(N):
    for q in questions:
        answer = ask_question_small(q,generator)
        if answer is None:
            results[q['question']][''] += 1
        else:
            results[q['question']][answer] += 1

In [None]:
percentages = {}
for q in questions:
    q_text = q['question']
    total = sum(results[q_text].values())
    percentages[q_text] = {choice: round(count/total*100,2) for choice, count in results[q_text].items()}
percentages

In [None]:
# Save results to JSON
import re

def get_op_path():
    prefix = re.sub(r'[^A-Za-z0-9.+-]+', '_', model_name)      # step 1
    prefix = re.sub(r'_+', '_', prefix).strip('_')             # step 2
    
    return Path(f'{prefix}_survey_results_rep.json')

output_path = get_op_path()
with open(output_path, 'w') as f:
    json.dump(percentages, f, indent=2)
print(f'Results saved to {output_path}')