In [1]:
import sys
from common.path_utils import get_src_path, get_data_path
sys.path.append(str(get_src_path()))


from common.datatypes import ForecastingQuestion_stripped, ForecastingQuestion
import json

# llm_forecasting imports
from forecasters.llm_forecasting.prompts.prompts import PROMPT_DICT
from forecasters.llm_forecasting.utils.time_utils import get_todays_date, subtract_days_from_date
from forecasters.llm_forecasting import ranking, summarize, ensemble



  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


LOCAL_CACHE: None


### Load Data

In [2]:
data = []
with open(get_data_path() / "fq/real/questions_cleaned_formatted.jsonl", "r") as file:
    for line in file:
        data.append(json.loads(line))

In [4]:
sample_question = data[0]
print(sample_question['title'])


Will SpaceX land people on Mars before 2030?


In [5]:
fq = ForecastingQuestion(**sample_question)

### Testing "Advanced Forecaster"

In [6]:
from forecasters.advanced_forecaster import AdvancedForecaster
af = AdvancedForecaster(MAX_WORDS_NEWSCATCHER=5, MAX_WORDS_GNEWS=8, BASE_REASONING_MODEL_NAMES=["gpt-3.5-turbo-1106", "gpt-3.5-turbo-1106"])

Loading AdvancedForecaster...
Overriding retrieval_config: MAX_WORDS_NEWSCATCHER=5
Overriding retrieval_config: MAX_WORDS_GNEWS=8
Overriding reasoning_config: BASE_REASONING_MODEL_NAMES:=['gpt-3.5-turbo-1106', 'gpt-3.5-turbo-1106']


In [9]:
final_prob = await af.call_async(sentence=fq)

An error occurred while fetching the article: Article `download()` failed with Website protected with PerimeterX, url: None on URL https://news.google.com/rss/articles/CBMiU2h0dHBzOi8vd3d3Lm15c2FuYW50b25pby5jb20vYnVzaW5lc3MvYXJ0aWNsZS9zcGFjZXgtaHVtYW4tc3BhY2VmbGlnaHQtMTk0MzA2OTUucGhw0gEA?oc=5&hl=en-US&gl=US&ceid=US:en


In [8]:
print("Final LLM probability", final_prob)

Final LLM probability 0.55


Now we test the two procedures that make up AdvancedForecaster: retrieval and reasoning.


### Retrieval

In [None]:
RETRIEVAL_CONFIG = {
    "NUM_SEARCH_QUERY_KEYWORDS": 3,
    "MAX_WORDS_NEWSCATCHER": 5,
    "MAX_WORDS_GNEWS": 8,
    "SEARCH_QUERY_MODEL_NAME": "gpt-4-1106-preview",
    "SEARCH_QUERY_TEMPERATURE": 0.0,
    "SEARCH_QUERY_PROMPT_TEMPLATES": [
        PROMPT_DICT["search_query"]["0"],
        PROMPT_DICT["search_query"]["1"],
    ],
    "NUM_ARTICLES_PER_QUERY": 5,
    "SUMMARIZATION_MODEL_NAME": "gpt-3.5-turbo-1106",
    "SUMMARIZATION_TEMPERATURE": 0.2,
    "SUMMARIZATION_PROMPT_TEMPLATE": PROMPT_DICT["summarization"]["9"],
    "NUM_SUMMARIES_THRESHOLD": 10,
    "PRE_FILTER_WITH_EMBEDDING": True,
    "PRE_FILTER_WITH_EMBEDDING_THRESHOLD": 0.32,
    "RANKING_MODEL_NAME": "gpt-3.5-turbo-1106",
    "RANKING_TEMPERATURE": 0.0,
    "RANKING_PROMPT_TEMPLATE": PROMPT_DICT["ranking"]["0"],
    "RANKING_RELEVANCE_THRESHOLD": 4,
    "RANKING_COSINE_SIMILARITY_THRESHOLD": 0.5,
    "SORT_BY": "date",
    "RANKING_METHOD": "llm-rating",
    "RANKING_METHOD_LLM": "title_250_tokens",
    "NUM_SUMMARIES_THRESHOLD": 20,
    "EXTRACT_BACKGROUND_URLS": True,
}

In [None]:
question = fq.title
background_info = fq.metadata["background_info"]
resolution_criteria = fq.body # resolution criteria and other info is in |body|

today_date = get_todays_date()
# If open date is set in data structure, change beginning of retrieval to question open date.
# Retrieve from [today's date - 1 month, today's date].
retrieval_dates = (
    subtract_days_from_date(today_date, 30),
    today_date,
)

In [None]:
(
    ranked_articles,
    all_articles,
    search_queries_list_gnews,
    search_queries_list_nc,
) = await ranking.retrieve_summarize_and_rank_articles(
    question,
    background_info,
    resolution_criteria,
    retrieval_dates,
    urls=[],
    config=RETRIEVAL_CONFIG,
    return_intermediates=True,
)

In [None]:
all_summaries = summarize.concat_summaries(
    ranked_articles[: RETRIEVAL_CONFIG["NUM_SUMMARIES_THRESHOLD"]]
)

print(all_summaries[:3000], "...")

---
ARTICLES
[1] SpaceX Gears Up for Starship Flight Test 4 with Unprecedented Upgrades and Preparations (published on 2024-05-04)
Summary: SpaceX is gearing up for the highly anticipated fourth flight test of its Starship spacecraft at Boca Chica, Texas. The site has been a hive of activity as engineers and technicians work tirelessly to ensure every component functions seamlessly. The preparations include innovative updates and rigorous testing regimens that could redefine space travel. The site has seen rapid construction and infrastructure modification essential for supporting the Starship behemoth. SpaceX has also been integrating new ground support equipment, including advanced tank systems and fueling mechanisms. The company's long-term goals for Starship include Mars colonization and playing a pivotal role in NASA's Artemis missions to the Moon. With continued successful development, testing, and collaboration, Starship stands on the precipice of revolutionizing space travel.



### Reasoning

In [None]:
REASONING_CONFIG = {
    "BASE_REASONING_MODEL_NAMES": ["gpt-4-1106-preview", "gpt-4-1106-preview"],
    "BASE_REASONING_TEMPERATURE": 1.0,
    "BASE_REASONING_PROMPT_TEMPLATES": [
        [
            PROMPT_DICT["binary"]["scratch_pad"]["1"],
            PROMPT_DICT["binary"]["scratch_pad"]["2"],
        ],
        [
            PROMPT_DICT["binary"]["scratch_pad"]["new_3"],
            PROMPT_DICT["binary"]["scratch_pad"]["new_6"],
        ],
    ],
    "AGGREGATION_METHOD": "meta",
    "AGGREGATION_PROMPT_TEMPLATE": PROMPT_DICT["meta_reasoning"]["0"],
    "AGGREGATION_TEMPERATURE": 0.2,
    "AGGREGATION_MODEL_NAME": "gpt-4",
    "AGGREGATION_WEIGTHTS": None,
}

In [None]:
close_date = "N/A"  # data doesn't have explicit close date, so set to N/A
today_to_close_date = [today_date, close_date]

ensemble_dict = await ensemble.meta_reason(
    question=question,
    background_info=background_info,
    resolution_criteria=resolution_criteria,
    today_to_close_date_range=today_to_close_date,
    retrieved_info=all_summaries,
    reasoning_prompt_templates=REASONING_CONFIG["BASE_REASONING_PROMPT_TEMPLATES"],
    base_model_names=REASONING_CONFIG["BASE_REASONING_MODEL_NAMES"],
    base_temperature=REASONING_CONFIG["BASE_REASONING_TEMPERATURE"],
    aggregation_method=REASONING_CONFIG["AGGREGATION_METHOD"],
    weights=REASONING_CONFIG["AGGREGATION_WEIGTHTS"],
    meta_model_name=REASONING_CONFIG["AGGREGATION_MODEL_NAME"],
    meta_prompt_template=REASONING_CONFIG["AGGREGATION_PROMPT_TEMPLATE"],
    meta_temperature=REASONING_CONFIG["AGGREGATION_TEMPERATURE"],
)

In [None]:
print("REASONING\n", "=================")
print(ensemble_dict["meta_reasoning"])

print("PROBABILITY\n", "=================")
print(ensemble_dict["meta_prediction"])

REASONING
1. Reasons why the answer might be no:
   - **Technological Challenges**: The complexity of landing humans on Mars is immense. Despite SpaceX's progress with Starship, unforeseen technical hurdles could arise that delay the timeline.
   - **Safety and Testing**: Ensuring the safety of astronauts is paramount. This requires extensive testing and possibly redesigning components based on test outcomes, which could prolong the timeline.
   - **Regulatory and Funding Constraints**: SpaceX's plans are subject to rigorous regulatory scrutiny and funding hurdles. Delays in regulatory approvals or funding could push the timeline beyond 2030.
   - **Historical Delays**: Historically, space missions often face delays due to their complexity. Given the ambitious nature of SpaceX's plan, it's possible that the timeline could be pushed back.

2. Reasons why the answer might be yes:
   - **Proven Track Record**: SpaceX has demonstrated its capability to deliver on its goals, providing a sol