In [16]:
import sys
from common.path_utils import get_src_path, get_data_path
sys.path.append(str(get_src_path()))
sys.path.append(str(get_src_path() / "forecasters/llm_forecasting"))

from common.datatypes import ForecastingQuestion_stripped, ForecastingQuestion
import json

# llm_forecasting imports
from config.constants import PROMPT_DICT
import ranking
import summarize
import ensemble

### Load Data

In [17]:
data = []
with open(get_data_path() / "fq/real/questions_cleaned_formatted.jsonl", "r") as file:
    for line in file:
        data.append(json.loads(line))



In [3]:
sample_question = data[0]

In [4]:
fq = ForecastingQuestion(**sample_question)

### Retrieval

In [5]:
RETRIEVAL_CONFIG = {
    "NUM_SEARCH_QUERY_KEYWORDS": 3,
    "MAX_WORDS_NEWSCATCHER": 5,
    "MAX_WORDS_GNEWS": 8,
    "SEARCH_QUERY_MODEL_NAME": "gpt-4-1106-preview",
    "SEARCH_QUERY_TEMPERATURE": 0.0,
    "SEARCH_QUERY_PROMPT_TEMPLATES": [
        PROMPT_DICT["search_query"]["0"],
        PROMPT_DICT["search_query"]["1"],
    ],
    "NUM_ARTICLES_PER_QUERY": 5,
    "SUMMARIZATION_MODEL_NAME": "gpt-3.5-turbo-1106",
    "SUMMARIZATION_TEMPERATURE": 0.2,
    "SUMMARIZATION_PROMPT_TEMPLATE": PROMPT_DICT["summarization"]["9"],
    "NUM_SUMMARIES_THRESHOLD": 10,
    "PRE_FILTER_WITH_EMBEDDING": True,
    "PRE_FILTER_WITH_EMBEDDING_THRESHOLD": 0.32,
    "RANKING_MODEL_NAME": "gpt-3.5-turbo-1106",
    "RANKING_TEMPERATURE": 0.0,
    "RANKING_PROMPT_TEMPLATE": PROMPT_DICT["ranking"]["0"],
    "RANKING_RELEVANCE_THRESHOLD": 4,
    "RANKING_COSINE_SIMILARITY_THRESHOLD": 0.5,
    "SORT_BY": "date",
    "RANKING_METHOD": "llm-rating",
    "RANKING_METHOD_LLM": "title_250_tokens",
    "NUM_SUMMARIES_THRESHOLD": 20,
    "EXTRACT_BACKGROUND_URLS": True,
}

In [6]:
question = fq.title
background_info = fq.metadata["background_info"]
resolution_criteria = fq.body
retrieval_dates = ("2024-03-01", "2024-05-04") # artificially set for now

In [7]:
(
    ranked_articles,
    all_articles,
    search_queries_list_gnews,
    search_queries_list_nc,
) = await ranking.retrieve_summarize_and_rank_articles(
    question,
    background_info,
    resolution_criteria,
    retrieval_dates,
    urls=[],
    config=RETRIEVAL_CONFIG,
    return_intermediates=True,
)

INFO:ranking:Finding 3 search query keywords via LLM...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:ranking:Search queries for NC: ['Will SpaceX land people on Mars before 2030?', 'SpaceX Mars mission progress', 'SpaceX Mars mission challenges', 'Elon Musk Mars timeline update', 'SpaceX Mars timeline update']
INFO:ranking:Search queries for GNews: ['Will SpaceX land people on Mars before 2030?', 'Elon Musk statement on Mars landing', 'SpaceX Mars mission progress 2024', 'Challenges facing SpaceX Mars landing', 'SpaceX Interplanetary Transport System updates', 'SpaceX Mars mission timeline update']
INFO:information_retrieval:Retrieved 0 articles for Will SpaceX land people on Mars before 2030? via Newscatcher.
INFO:informa

In [8]:
all_summaries = summarize.concat_summaries(
    ranked_articles[: RETRIEVAL_CONFIG["NUM_SUMMARIES_THRESHOLD"]]
)

In [9]:
print(all_summaries[:3000], "...")

---
ARTICLES
[1] SpaceX making progress on Starship in-space refueling technologies (published on 2024-04-27)
Summary: NASA says SpaceX is on track to demonstrate in-space refueling of Starship next year, a critical technology for returning humans to the lunar surface. SpaceX achieved one step towards refueling of Starship with a demonstration on the latest Starship test flight March 14. The next major milestone is a demonstration planned for 2025 where two Starships will dock in orbit, with one transferring propellants to the other. Propellant transfer technology is essential to SpaceX’s plans for Starship missions beyond low Earth orbit, including the Human Landing System (HLS) version of Starship that will be used to land astronauts on the moon starting with the Artemis 3 mission, currently scheduled for no earlier than September 2026. Elon Musk, chief executive of SpaceX, posted on social media April 26, responding to another post about Kshatriya’s presentation. “Full & rapid reusa

### Reasoning

In [10]:
REASONING_CONFIG = {
    "BASE_REASONING_MODEL_NAMES": ["gpt-4-1106-preview", "gpt-4-1106-preview"],
    "BASE_REASONING_TEMPERATURE": 1.0,
    "BASE_REASONING_PROMPT_TEMPLATES": [
        [
            PROMPT_DICT["binary"]["scratch_pad"]["1"],
            PROMPT_DICT["binary"]["scratch_pad"]["2"],
        ],
        [
            PROMPT_DICT["binary"]["scratch_pad"]["new_3"],
            PROMPT_DICT["binary"]["scratch_pad"]["new_6"],
        ],
    ],
    "ALIGNMENT_MODEL_NAME": "gpt-3.5-turbo-1106",
    "ALIGNMENT_TEMPERATURE": 0,
    "ALIGNMENT_PROMPT": PROMPT_DICT["alignment"]["0"],
    "AGGREGATION_METHOD": "meta",
    "AGGREGATION_PROMPT_TEMPLATE": PROMPT_DICT["meta_reasoning"]["0"],
    "AGGREGATION_TEMPERATURE": 0.2,
    "AGGREGATION_MODEL_NAME": "gpt-4",
    "AGGREGATION_WEIGTHTS": None,
}

In [11]:
today_to_close_date = [retrieval_dates[1], "N/A"]
ensemble_dict = await ensemble.meta_reason(
    question=question,
    background_info=background_info,
    resolution_criteria=resolution_criteria,
    today_to_close_date_range=today_to_close_date,
    retrieved_info=all_summaries,
    reasoning_prompt_templates=REASONING_CONFIG["BASE_REASONING_PROMPT_TEMPLATES"],
    base_model_names=REASONING_CONFIG["BASE_REASONING_MODEL_NAMES"],
    base_temperature=REASONING_CONFIG["BASE_REASONING_TEMPERATURE"],
    aggregation_method=REASONING_CONFIG["AGGREGATION_METHOD"],
    answer_type="probability",
    weights=REASONING_CONFIG["AGGREGATION_WEIGTHTS"],
    meta_model_name=REASONING_CONFIG["AGGREGATION_MODEL_NAME"],
    meta_prompt_template=REASONING_CONFIG["AGGREGATION_PROMPT_TEMPLATE"],
    meta_temperature=REASONING_CONFIG["AGGREGATION_TEMPERATURE"],
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:model_eval:Finished 2 base reasonings generated by gpt-4-1106-preview
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:model_eval:Finished 2 base reasonings generated by gpt-4-1106-preview
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [12]:
print(ensemble_dict["meta_reasoning"])

1. Provide reasons why the answer might be no.
- Technical Challenges: The development of technologies necessary for a Mars mission, such as in-space refueling and a spacecraft capable of landing and taking off from Mars, is complex and still in progress. Unforeseen technical issues could arise, causing delays.
- Regulatory and Safety Hurdles: A manned mission to Mars would need to meet stringent safety standards and gain approval from regulatory bodies, which could take longer than anticipated.
- Financial Constraints: The cost of a manned Mars mission is substantial. While SpaceX has been successful in securing funding so far, unexpected costs or a decrease in investment could lead to financial constraints and delays.
- Performance Shortfall: The recent revelation of a 50% underperformance in Starship's payload capacity for lunar missions raises doubts about its ability to complete a Mars mission.

2. Provide reasons why the answer might be yes.
- Track Record: SpaceX has a history o

### Testing "Advanced Forecaster"

In [13]:
from forecasters.advanced_forecaster import AdvancedForecaster
af = AdvancedForecaster()





In [14]:
res = await af.call_async(sentence=fq)

INFO:ranking:Finding 3 search query keywords via LLM...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:ranking:Search queries for NC: ['SpaceX funding partnerships', 'SpaceX Interplanetary Transport System development', 'Will SpaceX land people on Mars before 2030?', 'SpaceX Mars mission progress', 'Elon Musk Mars 2030 update', 'Elon Musk Mars timeline update']
INFO:ranking:Search queries for GNews: ['SpaceX funding regulatory challenges Mars', 'Will SpaceX land people on Mars before 2030?', 'Elon Musk statement on Mars landing', 'Elon Musk update Mars mission timeline', 'SpaceX Mars mission progress 2024', 'Challenges advancements SpaceX Mars 2030']
INFO:information_re

In [15]:
res

0.45