In [1]:
import sys
from common.path_utils import get_src_path, get_data_path
sys.path.append(str(get_src_path()))
sys.path.append(str(get_src_path() / "forecasters/llm_forecasting"))

from common.datatypes import ForecastingQuestion_stripped, ForecastingQuestion
import json

# llm_forecasting imports
from prompts.prompts import PROMPT_DICT
from utils.time_utils import get_todays_date, subtract_days_from_date
import ranking
import summarize
import ensemble

  from .autonotebook import tqdm as notebook_tqdm


### Load Data

In [2]:
data = []
with open(get_data_path() / "fq/real/questions_cleaned_formatted.jsonl", "r") as file:
    for line in file:
        data.append(json.loads(line))

In [3]:
sample_question = data[0]

In [4]:
fq = ForecastingQuestion(**sample_question)

### Retrieval

In [5]:
RETRIEVAL_CONFIG = {
    "NUM_SEARCH_QUERY_KEYWORDS": 3,
    "MAX_WORDS_NEWSCATCHER": 5,
    "MAX_WORDS_GNEWS": 8,
    "SEARCH_QUERY_MODEL_NAME": "gpt-4-1106-preview",
    "SEARCH_QUERY_TEMPERATURE": 0.0,
    "SEARCH_QUERY_PROMPT_TEMPLATES": [
        PROMPT_DICT["search_query"]["0"],
        PROMPT_DICT["search_query"]["1"],
    ],
    "NUM_ARTICLES_PER_QUERY": 5,
    "SUMMARIZATION_MODEL_NAME": "gpt-3.5-turbo-1106",
    "SUMMARIZATION_TEMPERATURE": 0.2,
    "SUMMARIZATION_PROMPT_TEMPLATE": PROMPT_DICT["summarization"]["9"],
    "NUM_SUMMARIES_THRESHOLD": 10,
    "PRE_FILTER_WITH_EMBEDDING": True,
    "PRE_FILTER_WITH_EMBEDDING_THRESHOLD": 0.32,
    "RANKING_MODEL_NAME": "gpt-3.5-turbo-1106",
    "RANKING_TEMPERATURE": 0.0,
    "RANKING_PROMPT_TEMPLATE": PROMPT_DICT["ranking"]["0"],
    "RANKING_RELEVANCE_THRESHOLD": 4,
    "RANKING_COSINE_SIMILARITY_THRESHOLD": 0.5,
    "SORT_BY": "date",
    "RANKING_METHOD": "llm-rating",
    "RANKING_METHOD_LLM": "title_250_tokens",
    "NUM_SUMMARIES_THRESHOLD": 20,
    "EXTRACT_BACKGROUND_URLS": True,
}

In [6]:
question = fq.title
background_info = fq.metadata["background_info"]
resolution_criteria = fq.body # resolution criteria and other info is in |body|

today_date = get_todays_date()
# If open date is set in data structure, change beginning of retrieval to question open date.
# Retrieve from [today's date - 1 month, today's date].
retrieval_dates = (
    subtract_days_from_date(today_date, 30),
    today_date,
)

In [7]:
(
    ranked_articles,
    all_articles,
    search_queries_list_gnews,
    search_queries_list_nc,
) = await ranking.retrieve_summarize_and_rank_articles(
    question,
    background_info,
    resolution_criteria,
    retrieval_dates,
    urls=[],
    config=RETRIEVAL_CONFIG,
    return_intermediates=True,
)

05/07/2024 01:38:32 AM - Finding 3 search query keywords via LLM...
05/07/2024 01:38:36 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:38:37 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:38:38 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:38:39 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:38:39 AM - Search queries for NC: ['SpaceX Interplanetary Transport System tests', 'Elon Musk Mars timeline update', 'Elon Musk Mars 2030 update', 'SpaceX Mars mission progress', 'SpaceX technological advancements', 'Will SpaceX land people on Mars before 2030?']
05/07/2024 01:38:39 AM - Search queries for GNews: ['SpaceX funding regulatory challenges 2024', 'Elon Musk update Mars landing timeline', 'SpaceX Mars mission progress 2024', 'Will SpaceX land people on Mars before 2030?', 'SpaceX In

An error occurred while fetching the article: Article `download()` failed with Status code 403 for url None on URL https://news.google.com/rss/articles/CBMibGh0dHBzOi8vd3d3LmlidGltZXMuY28udWsvc3BhY2V4LW1hcnMtcGxhbnMtMTAwMC1zcGFjZXNoaXBzLWRlbGl2ZXItZmlyc3QtY29sb25pc3RzLXdpdGhpbi03LTkteWVhcnMtMTcyNDMyN9IBAA?oc=5&hl=en-US&gl=US&ceid=US:en


05/07/2024 01:39:01 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiS2h0dHBzOi8vd3d3LnNwYWNlLmNvbS9zcGFjZXgtc3RhcnNoaXAtNTAwLWZlZXQtdGFsbC1tYXJzLW1pc3Npb25zLWVsb24tbXVza9IBAA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:39:01 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiWWh0dHBzOi8vc3BhY2VuZXdzLmNvbS9zcGFjZXgtbWFraW5nLXByb2dyZXNzLW9uLXN0YXJzaGlwLWluLXNwYWNlLXJlZnVlbGluZy10ZWNobm9sb2dpZXMv0gEA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:39:02 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiWWh0dHBzOi8vd3d3Lm5hc2EuZ292L2NlbnRlcnMtYW5kLWZhY2lsaXRpZXMvbWFyc2hhbGwvdGhlLW1hcnNoYWxsLXN0YXItZm9yLWFwcmlsLTE3LTIwMjQv0gEA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:39:03 AM - get_html_2XX_only(): bad status code 403 on URL: https://news.google.com/rss/articles/CBMiU2h0dHBzOi8vd3d3Lm15c2FuYW50b25pby5jb20vYnVzaW5lc3MvYXJ0aWNsZS9zcGFjZXgtaHVtYW4tc3BhY2VmbGlnaHQtMTk0MzA2OTUucGhw0gEA?oc=5&hl=en-US&gl=US&ceid

An error occurred while fetching the article: Article `download()` failed with Website protected with PerimeterX, url: None on URL https://news.google.com/rss/articles/CBMiU2h0dHBzOi8vd3d3Lm15c2FuYW50b25pby5jb20vYnVzaW5lc3MvYXJ0aWNsZS9zcGFjZXgtaHVtYW4tc3BhY2VmbGlnaHQtMTk0MzA2OTUucGhw0gEA?oc=5&hl=en-US&gl=US&ceid=US:en


05/07/2024 01:39:03 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiPmh0dHBzOi8vb2JzZXJ2ZXIuY29tLzIwMjQvMDQvc3BhY2UtbWlzc2lvbnMtdG8td2F0Y2gtbWF5LTIwMjQv0gEA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:39:04 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiWmh0dHBzOi8vc3BhY2VuZXdzLmNvbS9qYXBhbmVzZS1hc3Ryb25hdXRzLXRvLWxhbmQtb24tbW9vbi1hcy1wYXJ0LW9mLW5ldy1uYXNhLXBhcnRuZXJzaGlwL9IBAA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:39:04 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiSGh0dHBzOi8vd3d3Lndhc2hpbmd0b25wb3N0LmNvbS90ZWNobm9sb2d5LzIwMjQvMDUvMDIvY2hpbmEtbW9vbi1taXNzaW9uL9IBAA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:39:06 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiOWh0dHBzOi8vd3d3LnNwYWNlLmNvbS9jaGluZXNlLXByZWVtaW5lbmNlLW1vb24tbWFycy1vcC1lZNIBAA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:39:06 AM - Retrieved full article text for https://news.google.com/r

In [8]:
all_summaries = summarize.concat_summaries(
    ranked_articles[: RETRIEVAL_CONFIG["NUM_SUMMARIES_THRESHOLD"]]
)

print(all_summaries[:3000], "...")

---
ARTICLES
[1] SpaceX has added a 'human spaceflight' portion to its web site (published on 2024-05-06)
Summary: SpaceX has added human spaceflight offerings to its website, including missions to Earth orbit, the International Space Station (ISS), lunar orbit, and Mars. The company's Starship upper stage is one of the vehicles listed for these missions. Pricing information is not available, but interested customers can inquire via email. Missions to Earth orbit will begin later this year, with seats and research opportunities available in late 2024. Ten-day commercial missions to the ISS will be available in 2025. Trips to lunar orbit and Mars do not have listed timelines. SpaceX's ambitious plan suggests the possibility of landing people on Mars before 2030.

[2] SpaceX Gears Up for Starship Flight Test 4 with Unprecedented Upgrades and Preparations (published on 2024-05-04)
Summary: SpaceX is gearing up for the highly anticipated fourth flight test of its Starship spacecraft at Boc

### Reasoning

In [9]:
REASONING_CONFIG = {
    "BASE_REASONING_MODEL_NAMES": ["gpt-4-1106-preview", "gpt-4-1106-preview"],
    "BASE_REASONING_TEMPERATURE": 1.0,
    "BASE_REASONING_PROMPT_TEMPLATES": [
        [
            PROMPT_DICT["binary"]["scratch_pad"]["1"],
            PROMPT_DICT["binary"]["scratch_pad"]["2"],
        ],
        [
            PROMPT_DICT["binary"]["scratch_pad"]["new_3"],
            PROMPT_DICT["binary"]["scratch_pad"]["new_6"],
        ],
    ],
    "AGGREGATION_METHOD": "meta",
    "AGGREGATION_PROMPT_TEMPLATE": PROMPT_DICT["meta_reasoning"]["0"],
    "AGGREGATION_TEMPERATURE": 0.2,
    "AGGREGATION_MODEL_NAME": "gpt-4",
    "AGGREGATION_WEIGTHTS": None,
}

In [10]:
close_date = "N/A"  # data doesn't have explicit close date, so set to N/A
today_to_close_date = [today_date, close_date]

ensemble_dict = await ensemble.meta_reason(
    question=question,
    background_info=background_info,
    resolution_criteria=resolution_criteria,
    today_to_close_date_range=today_to_close_date,
    retrieved_info=all_summaries,
    reasoning_prompt_templates=REASONING_CONFIG["BASE_REASONING_PROMPT_TEMPLATES"],
    base_model_names=REASONING_CONFIG["BASE_REASONING_MODEL_NAMES"],
    base_temperature=REASONING_CONFIG["BASE_REASONING_TEMPERATURE"],
    aggregation_method=REASONING_CONFIG["AGGREGATION_METHOD"],
    weights=REASONING_CONFIG["AGGREGATION_WEIGTHTS"],
    meta_model_name=REASONING_CONFIG["AGGREGATION_MODEL_NAME"],
    meta_prompt_template=REASONING_CONFIG["AGGREGATION_PROMPT_TEMPLATE"],
    meta_temperature=REASONING_CONFIG["AGGREGATION_TEMPERATURE"],
)

05/07/2024 01:39:58 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:40:05 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:40:05 AM - Finished 2 base reasonings generated by gpt-4-1106-preview
05/07/2024 01:40:32 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:40:39 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:40:39 AM - Finished 2 base reasonings generated by gpt-4-1106-preview
05/07/2024 01:41:03 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [11]:
print("REASONING\n", "=================")
print(ensemble_dict["meta_reasoning"])

print("PROBABILITY\n", "=================")
print(ensemble_dict["meta_prediction"])

REASONING
1. Provide reasons why the answer might be no.
- Technological Challenges: The mission to Mars is fraught with numerous technical hurdles, including the development of a reliable interplanetary transport system, mastering in-space refueling, and ensuring the safe landing of a crewed spacecraft on Mars. These challenges could potentially delay the mission.
- Regulatory and Safety Concerns: Given the high-risk nature of the mission, regulatory bodies might impose stringent safety requirements, which could lead to delays.
- Financial Constraints: The Mars mission is a costly endeavor. Despite SpaceX's successful track record in securing funding, the financial requirements for this mission could exceed the company's budget or investor appetite.
- Unforeseen Complications: Space missions are often subject to unexpected setbacks, such as technical failures or logistical issues, which could significantly delay the timeline.

2. Provide reasons why the answer might be yes.
- Progress

### Testing "Advanced Forecaster"

In [15]:
from forecasters.advanced_forecaster import AdvancedForecaster
af = AdvancedForecaster()

In [16]:
final_prob = await af.call_async(sentence=fq)

05/07/2024 01:43:21 AM - Finding 3 search query keywords via LLM...
05/07/2024 01:43:26 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:43:27 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:43:27 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:43:31 AM - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
05/07/2024 01:43:31 AM - Search queries for NC: ['SpaceX Mars mission update', 'SpaceX Mars mission progress', 'SpaceX Mars timeline update', 'Will SpaceX land people on Mars before 2030?', 'Elon Musk Mars timeline', 'SpaceX Mars mission challenges', 'SpaceX Interplanetary Transport progress']
05/07/2024 01:43:31 AM - Search queries for GNews: ['Elon Musk update Mars landing timeline', 'SpaceX Mars mission progress 2024', 'Will SpaceX land people on Mars before 2030?', 'SpaceX Interplanetary Transport Syste

An error occurred while fetching the article: Article `download()` failed with Status code 403 for url None on URL https://news.google.com/rss/articles/CBMibGh0dHBzOi8vd3d3LmlidGltZXMuY28udWsvc3BhY2V4LW1hcnMtcGxhbnMtMTAwMC1zcGFjZXNoaXBzLWRlbGl2ZXItZmlyc3QtY29sb25pc3RzLXdpdGhpbi03LTkteWVhcnMtMTcyNDMyN9IBAA?oc=5&hl=en-US&gl=US&ceid=US:en


05/07/2024 01:43:48 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiS2h0dHBzOi8vd3d3LnNwYWNlLmNvbS9zcGFjZXgtc3RhcnNoaXAtNTAwLWZlZXQtdGFsbC1tYXJzLW1pc3Npb25zLWVsb24tbXVza9IBAA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:43:48 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiWWh0dHBzOi8vc3BhY2VuZXdzLmNvbS9zcGFjZXgtbWFraW5nLXByb2dyZXNzLW9uLXN0YXJzaGlwLWluLXNwYWNlLXJlZnVlbGluZy10ZWNobm9sb2dpZXMv0gEA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:43:49 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiWWh0dHBzOi8vd3d3Lm5hc2EuZ292L2NlbnRlcnMtYW5kLWZhY2lsaXRpZXMvbWFyc2hhbGwvdGhlLW1hcnNoYWxsLXN0YXItZm9yLWFwcmlsLTE3LTIwMjQv0gEA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:43:50 AM - get_html_2XX_only(): bad status code 403 on URL: https://news.google.com/rss/articles/CBMiU2h0dHBzOi8vd3d3Lm15c2FuYW50b25pby5jb20vYnVzaW5lc3MvYXJ0aWNsZS9zcGFjZXgtaHVtYW4tc3BhY2VmbGlnaHQtMTk0MzA2OTUucGhw0gEA?oc=5&hl=en-US&gl=US&ceid

An error occurred while fetching the article: Article `download()` failed with Website protected with PerimeterX, url: None on URL https://news.google.com/rss/articles/CBMiU2h0dHBzOi8vd3d3Lm15c2FuYW50b25pby5jb20vYnVzaW5lc3MvYXJ0aWNsZS9zcGFjZXgtaHVtYW4tc3BhY2VmbGlnaHQtMTk0MzA2OTUucGhw0gEA?oc=5&hl=en-US&gl=US&ceid=US:en


05/07/2024 01:43:50 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiPmh0dHBzOi8vb2JzZXJ2ZXIuY29tLzIwMjQvMDQvc3BhY2UtbWlzc2lvbnMtdG8td2F0Y2gtbWF5LTIwMjQv0gEA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:43:51 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiWmh0dHBzOi8vc3BhY2VuZXdzLmNvbS9qYXBhbmVzZS1hc3Ryb25hdXRzLXRvLWxhbmQtb24tbW9vbi1hcy1wYXJ0LW9mLW5ldy1uYXNhLXBhcnRuZXJzaGlwL9IBAA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:43:52 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiSGh0dHBzOi8vd3d3Lndhc2hpbmd0b25wb3N0LmNvbS90ZWNobm9sb2d5LzIwMjQvMDUvMDIvY2hpbmEtbW9vbi1taXNzaW9uL9IBAA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:43:55 AM - Retrieved full article text for https://news.google.com/rss/articles/CBMiOWh0dHBzOi8vd3d3LnNwYWNlLmNvbS9jaGluZXNlLXByZWVtaW5lbmNlLW1vb24tbWFycy1vcC1lZNIBAA?oc=5&hl=en-US&gl=US&ceid=US:en
05/07/2024 01:43:56 AM - Retrieved full article text for https://news.google.com/r

In [17]:
print("Final LLM probability", final_prob)

Final LLM probability 0.45
