## Setup

In [36]:
import os, json
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from dotenv import load_dotenv
from tqdm.notebook import tqdm
load_dotenv()

True

In [37]:
from openai import OpenAI
openai_client = OpenAI(
    api_key=os.getenv('OPENAI_API_KEY'))

In [38]:
import requests

def make_gpt_response(messages, model="gpt-4o", seed=42):

    if model == "gpt-4o":

        url = "https://www.metaculus.com/proxy/openai/v1/chat/completions/"
        auth_header = {"Authorization": f"Token {os.getenv('METACULUS_TOKEN_ALT')}"}
        data = {
            "model": "gpt-4o",
            "seed": seed,
            "temperature": 0.5,
            "messages": messages
        }

        response = requests.post(url, headers=auth_header, json=data)
        response = json.loads(response.text)
        response = response['choices'][0]['message']['content']
        return response
    
    else:
        response = openai_client.chat.completions.create(
            model=model,
            messages=messages,
            seed=seed
        )
        return response.choices[0].message.content

In [39]:
messages = [{"role":"user","content":"what is the capital of France?"}]
response = make_gpt_response(messages, model="o1-preview", seed=42)
print(response)

The capital of France is Paris.


In [40]:
make_background_prompt = open("prompts/make_background.prompt", "r").read()
make_subquestions_prompt = open("prompts/make_subquestions.prompt", "r").read()
make_subanswer_prompt = open("prompts/make_subanswer.prompt", "r").read()
make_probability_prompt = open("prompts/make_probability.prompt", "r").read()

In [41]:
from openai import OpenAI
perplexity_client = OpenAI(
    api_key=os.environ["PERPLEXITY_KEY"],
    base_url="https://api.perplexity.ai")

In [42]:
def make_background(question:str):

    messages = [{'role':'system','content':make_background_prompt},
                {'role':'user','content':question}]
    response = perplexity_client.chat.completions.create(
        seed=42,
        model="llama-3.1-sonar-large-128k-online",
        messages=messages
    )
    return response.choices[0].message.content

In [43]:
def answer_subquestion(subquestion:str):

    messages = [{'role':'system','content':make_subanswer_prompt},
                {'role':'user','content':subquestion}]
    response = perplexity_client.chat.completions.create(
        seed=42,
        model="llama-3.1-sonar-large-128k-online",
        messages=messages
    )
    return response.choices[0].message.content

In [44]:
import re
def process_forecast_probabilty(forecast_text: str):
    """
    Extract the forecast probability from the forecast text and clamp it between 1 and 99.
    """
    matches = re.findall(r"(\d+)%", forecast_text)
    if matches:
        # Return the last number found before a '%'
        number = int(matches[-1])
        number = min(99, max(1, number))  # clamp the number between 1 and 99
        return number
    else:
        return None

In [45]:
def calibrate_probability(probability):
    #TODO calibrate the probability via Q3 curve
    return probability

In [46]:
from tenacity import retry, stop_after_attempt, wait_fixed

@retry(stop=stop_after_attempt(3), wait=wait_fixed(10))
def make_forecast(
        question:str, 
        date:datetime=None,
        background:str=None,
        criteria:str=None, 
        fine_print:str=None,
        model:str="gpt-4o",
        seed=42):
    
    if date is None:
        date = datetime.now()
    
    # get perplexity background
    extra_background = make_background(question)

    # decompose question into subquestions
    payload = ""
    payload += f"QUESTION:\n{question}"
    payload += f"\n\nTODAY'S DATE:\n{date.strftime('%Y-%m-%d')}"
    payload += f"\n\nRESOLUTION CRITERIA:\n{criteria}"
    
    if fine_print:
        payload += f"\n\nADDITIONAL CRITERIA:\n{fine_print}"

    if background:
        payload += f"\n\nBACKGROUND:\n{background}"
        payload += f'\n\nADDITIONAL BACKGROUND:\n{extra_background}'
    else:
        payload += f"\n\nBACKGROUND:\n{extra_background}"

    messages = [
        {"role": "user", "content": make_subquestions_prompt},
        {"role": "user", "content": payload}]
    subquestions = make_gpt_response(messages, model=model, seed=seed)
    subquestions = subquestions.split("\n")
    subquestions = [x.strip() for x in subquestions]
    subquestions = [x for x in subquestions if x]

    # make answers to each subquestion
    sub_values = []
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {}
        for subquestion in subquestions:
            future = executor.submit(answer_subquestion, subquestion)
            futures[future] = subquestion
        for future in as_completed(futures):
            value = future.result()
            subquestion = futures[future]
            sub_values.append((subquestion, value))

    # make final probability
    payload += "\n\nRELEVANT SUBQUESTIONS:"
    sub_vals = []
    for subquestion, value in sub_values:
        text = f"SUBQUESTION: {subquestion}\nANSWER: {value}"
        sub_vals.append(text)
    sub_vals = "\n\n---\n\n".join(sub_vals)
    payload += f"\n\n{sub_vals}"

    messages=[
        {"role": "user", "content": make_probability_prompt},
        {"role": "user", "content": payload}]
    final_response = make_gpt_response(messages, model=model, seed=seed)

    # parse final_response to extract probability
    probability = process_forecast_probabilty(final_response)

    # calibrate probability
    probability = calibrate_probability(probability)

    assert probability is not None, f"ERROR: Could not extract probability from response: {final_response}"

    return (payload, final_response, probability)

# Test

In [12]:
payload, resp, prob = make_forecast(
    question = "Will the Real-time Sahm Rule Recession Indicator increase Year-over-Year for Nov 2024?",
    background = """According to FRED: > Sahm Recession Indicator signals the start of a recession when the three-month moving average of the national unemployment rate (U3) rises by 0.50 percentage points or more relative to the minimum of the three-month averages from the previous 12 months. This indicator is based on "real-time" data, that is, the unemployment rate (and the recent history of unemployment rates) that were available in a given month. The BLS revises the unemployment rate each year at the beginning of January, when the December unemployment rate for the prior year is published. Revisions to the seasonal factors can affect estimates in recent years. Otherwise the unemployment rate does not revise.""",
    criteria="This question will resolve based on reporting by the St. Louis Fed here:  https://fred.stlouisfed.org/series/SAHMREALTIME. The reported number for Nov 2024 must be greater than the reported for Nov 2023.",
    fine_print="The question will resolve according to the first value published for the period in question, later updates or revisions will be immaterial.",
    model="o1-preview"
)

print(payload)
print('-'*80)
print(resp)
print('-'*80)
print(prob)

KeyboardInterrupt: 

# Questions via metaculus API

In [47]:
AUTH_HEADERS = {"headers": {"Authorization": f"Token {os.getenv('METACULUS_TOKEN')}"}}
API_BASE_URL = "https://www.metaculus.com/api2"
TOURNAMENT_ID = 32506 # 32506 is the tournament ID for Q4 AI Benchmarking

In [48]:
import requests, json

def post_question_comment(question_id: int, comment_text: str) -> None:
    """
    Post a comment on the question page as the bot user.
    """

    response = requests.post(
        f"{API_BASE_URL}/comments/",
        json={
            "comment_text": comment_text,
            "submit_type": "N",
            "include_latest_prediction": True,
            "question": question_id,
        },
        **AUTH_HEADERS,
    )
    if not response.ok:
        raise Exception(response.text)
    
def post_question_prediction(question_id: int, prediction_percentage: float) -> None:
    """
    Post a prediction value (between 1 and 100) on the question.
    """
    assert 1 <= prediction_percentage <= 100, "Prediction must be between 1 and 100"
    url = f"{API_BASE_URL}/questions/{question_id}/predict/"
    response = requests.post(
        url,
        json={"prediction": float(prediction_percentage) / 100},
        **AUTH_HEADERS,
    )
    if not response.ok:
        raise Exception(response.text)

def get_question_details(question_id: int) -> dict:
    """
    Get all details about a specific question.
    """
    url = f"{API_BASE_URL}/questions/{question_id}/"
    response = requests.get(
        url,
        **AUTH_HEADERS,
    )
    if not response.ok:
        raise Exception(response.text)
    return json.loads(response.content)

def list_questions(tournament_id=TOURNAMENT_ID, offset=0, count=1000) -> list[dict]:
    """
    List (all details) {count} questions from the {tournament_id}
    """
    url_qparams = {
        "limit": count,
        "offset": offset,
        "has_group": "false",
        "order_by": "-activity",
        "forecast_type": "binary",
        "project": tournament_id,
        "status": "open",
        "type": "forecast",
        "include_description": "true",
    }
    url = f"{API_BASE_URL}/questions/"
    response = requests.get(url, **AUTH_HEADERS, params=url_qparams)
    if not response.ok:
        raise Exception(response.text)
    data = json.loads(response.content)
    return data

In [49]:
questions = list_questions()

open_questions_ids = []
for question in questions["results"]:
  if question["status"] == "open":
    # print(f"ID: {question['id']}\nQ: {question['title']}\nCloses: {question['scheduled_close_time']}")
    open_questions_ids.append(question["id"])

In [50]:
ouputs_dir = "outputs"
from pathlib import Path
output_path = Path(ouputs_dir)
completed = set()
for filename in output_path.glob("*.json"):
    qid = filename.stem.split("_")[-1]
    completed.add(int(qid))

In [51]:
SUBMIT_PREDICTION = True # set to True to publish your predictions to Metaculus
FORECAST_Q4_AIB = True # set to True to forecast Q4 AI Benchmarking.

# The list of questions to forecast
forecast_questions_ids = []
if FORECAST_Q4_AIB == True:
  forecast_questions_ids = open_questions_ids
else:
  forecast_questions_ids = [28830, 28706] # Only include binary questions

N = 0
for question_id in tqdm(forecast_questions_ids):
  if int(question_id) in completed: continue

  question_details = get_question_details(question_id)

  title = question_details["question"]["title"]
  resolution_criteria = question_details["question"]["resolution_criteria"]
  background = question_details["question"]["description"]
  fine_print = question_details["question"]["fine_print"]

  print('FORECASTING')
  print(f"Title: {title}")
  print(f"Background: {background}")

  prompt, comment, probability = make_forecast(
                            question=title, 
                            background=background, 
                            criteria=resolution_criteria, 
                            fine_print=fine_print,
                            model="o1-mini")

  print("Comment: ", comment)
  print("Probability: ", probability)

  assert type(probability) == int, "Unexpected probability format"

  current_date = datetime.now().strftime("%Y-%m-%d")
  with open(f"outputs/{current_date}_{question_id}.json", "w", encoding="utf-8") as f:
    output = {"ID": question_id, "ti        tle": title, "prompt": prompt, "comment": comment, "probability": probability}
    json.dump(output, f, ensure_ascii=False, indent=4)

  if SUBMIT_PREDICTION:
      post_question_prediction(question_id, probability)
      post_question_comment(question_id, comment)

  print('-'*80)

0it [00:00, ?it/s]