# Imports

In [1]:
import requests
import json
from collections import Counter
import time

LMSTUDIO_URL = "http://localhost:1234/v1/chat/completions"

# 1. An example

In [2]:
full_prompt = f"Answer the question step by step:\nSum 2+2\nA:"
    
headers = {"Content-Type": "application/json"}
payload = {
    "model": "local-model",  # This can be anything; LM Studio ignores it
    "messages": [{"role": "user", "content": full_prompt}],
    "temperature": 0.7,
    "max_tokens": 200,
}

In [3]:
response = requests.post(LMSTUDIO_URL, headers=headers, json=payload)

In [4]:
response.json()

{'id': 'chatcmpl-lpkazxdiombsc3obq1caw',
 'object': 'chat.completion',
 'created': 1746260920,
 'model': 'llama-3.2-3b-instruct',
 'choices': [{'index': 0,
   'logprobs': None,
   'finish_reason': 'stop',
   'message': {'role': 'assistant',
    'content': "Here's the step-by-step calculation:\n\n1. Start with the given numbers: 2 and 2\n2. Add the two numbers together: 2 + 2 = \n3. The result of the addition is: 4"}}],
 'usage': {'prompt_tokens': 50, 'completion_tokens': 48, 'total_tokens': 98},
 'stats': {},
 'system_fingerprint': 'llama-3.2-3b-instruct'}

# 2. Functions Definitions

In [5]:
def generate_reasoning(prompt, temperature=0.7):
    """Generate a single reasoning trace using LM Studio's local model."""
    full_prompt = f"Answer the question step by step:\n{prompt}\nA:"
    
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": "local-model",  # This can be anything; LM Studio ignores it
        "messages": [{"role": "user", "content": full_prompt}],
        "temperature": temperature,
        "max_tokens": 200,
    }

    response = requests.post(LMSTUDIO_URL, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        raise Exception(f"LM Studio request failed: {response.text}")

In [6]:
def extract_answer(text):
    """Extract the final number answer from the model output."""
    lines = text.strip().split("\n")
    for line in reversed(lines):
        if any(char.isdigit() for char in line):
            return ''.join(filter(lambda c: c.isdigit(), line))
    return "unknown"

In [7]:
def self_consistent_answer(prompt, num_samples=5):
    answers = []
    for i in range(num_samples):
        try:
            reasoning = generate_reasoning(prompt, temperature=0.8)
            answer = extract_answer(reasoning)
            print(f"Run {i+1}:\n{reasoning}\n-> Answer: {answer}\n")
            answers.append(answer)
            time.sleep(0.5)  # avoid overwhelming the local model
        except Exception as e:
            print(f"Error during generation {i+1}: {e}")

    most_common = Counter(answers).most_common(1)
    return most_common[0] if most_common else ("unknown", 0)

In [8]:
# Example usage
question = "If there are 3 cars and each car has 4 wheels, how many wheels are there in total?"
final_answer, count = self_consistent_answer(question, num_samples=5)
print(f"\n✅ Final Self-Consistent Answer: {final_answer} (Count: {count})")

Run 1:
To find the total number of wheels, we need to multiply the number of cars by the number of wheels each car has.

Step 1: Identify the number of cars (3)
Step 2: Identify the number of wheels per car (4)
Step 3: Multiply the number of cars by the number of wheels per car
   Total wheels = Number of cars * Number of wheels per car
                  = 3 * 4
                  = 12

Answer: There are 12 wheels in total.
-> Answer: 12

Run 2:
To find the total number of wheels, we need to follow these steps:

1. Identify the number of cars: There are 3 cars.
2. Identify the number of wheels on each car: Each car has 4 wheels.
3. Multiply the number of cars by the number of wheels on each car: 
   3 cars * 4 wheels/car = 12 wheels

Therefore, there are a total of 12 wheels.
-> Answer: 12

Run 3:
To find the total number of wheels, we need to multiply the number of cars by the number of wheels on each car.

Step 1: Identify the number of cars. 
Number of cars = 3

Step 2: Identify the 