In [1]:
%load_ext autoreload
%autoreload 2

## Prereqs

Set your OpenAI key (recommended) or TogetherAI key in llm_forecasting/cofig/keys.py, both of which provide free credits on sign up for users. 

## Import packages and load data

In [2]:
# Standard library imports
import pickle

# Third-party library imports
import pandas as pd

# Local application/library specific imports
from config.constants import PROMPT_DICT
from utils.data_utils import get_formatted_data
from utils.visualize_utils import visualize_all, visualize_all_ensemble
import ranking
import summarize
import ensemble

In [3]:
with open("sample_questions.pickle", "rb") as file:
    sample_qs = pickle.load(file)

In [4]:
with open("sample_questions.pickle", "wb") as file:
    pickle.dump(sample_qs, file)

In [5]:
formatted_data, raw_data = get_formatted_data(
    "",
    retrieval_index=1,
    num_retrievals=5,
    questions_after="2022",
    return_raw_question_data=True,
    data=sample_qs,
)

In [6]:
# For this demo, we'll evaluate the first question.

question = formatted_data["question_list"][9]
background_info = formatted_data["background_list"][9]
resolution_criteria = formatted_data["resolution_criteria_list"][9]
answer = formatted_data["answer_list"][9]
question_dates = formatted_data["question_dates_list"][9]
retrieval_dates = formatted_data["retrieval_dates_list"][9]
urls_in_background = formatted_data["urls_in_background_list"][9]

In [7]:
print("Question:", question)
print("Background:", background_info)

Question: Will Reddit announce changes or a delay to its proposed API fee pricing before July 1, 2023?
Background: In April 2023, Reddit announced new terms for its Application Programming Interface (API), the tool through which developers of third-party apps access Reddit's data. This change included the introduction of pricing for API usage, a significant departure from the previous practice of providing the API for free. This led to an outcry from both developers and users of third-party Reddit apps, as these changes could result in substantial costs for developers and the potential shutdown of many popular third-party apps.
The developer of the popular third-party app, Apollo for Reddit, Christian Selig, stated he would be liable for an annual cost of $20 million under the new terms. This revelation sparked widespread discontent among Reddit users, leading to a coordinated protest involving more than 8,000 Reddit communities, including some of the largest and most active, going dar

## Retrieval

In [8]:
RETRIEVAL_CONFIG = {
    "NUM_SEARCH_QUERY_KEYWORDS": 3,
    "MAX_WORDS_NEWSCATCHER": 5,
    "MAX_WORDS_GNEWS": 8,
    "SEARCH_QUERY_MODEL_NAME": "gpt-4-1106-preview",
    "SEARCH_QUERY_TEMPERATURE": 0.0,
    "SEARCH_QUERY_PROMPT_TEMPLATES": [
        PROMPT_DICT["search_query"]["0"],
        PROMPT_DICT["search_query"]["1"],
    ],
    "NUM_ARTICLES_PER_QUERY": 5,
    "SUMMARIZATION_MODEL_NAME": "gpt-3.5-turbo-1106",
    "SUMMARIZATION_TEMPERATURE": 0.2,
    "SUMMARIZATION_PROMPT_TEMPLATE": PROMPT_DICT["summarization"]["9"],
    "NUM_SUMMARIES_THRESHOLD": 10,
    "PRE_FILTER_WITH_EMBEDDING": True,
    "PRE_FILTER_WITH_EMBEDDING_THRESHOLD": 0.32,
    "RANKING_MODEL_NAME": "gpt-3.5-turbo-1106",
    "RANKING_TEMPERATURE": 0.0,
    "RANKING_PROMPT_TEMPLATE": PROMPT_DICT["ranking"]["0"],
    "RANKING_RELEVANCE_THRESHOLD": 4,
    "RANKING_COSINE_SIMILARITY_THRESHOLD": 0.5,
    "SORT_BY": "date",
    "RANKING_METHOD": "llm-rating",
    "RANKING_METHOD_LLM": "title_250_tokens",
    "NUM_SUMMARIES_THRESHOLD": 20,
    "EXTRACT_BACKGROUND_URLS": True,
}

In [9]:
(
    ranked_articles,
    all_articles,
    search_queries_list_gnews,
    search_queries_list_nc,
) = await ranking.retrieve_summarize_and_rank_articles(
    question,
    background_info,
    resolution_criteria,
    retrieval_dates,
    urls=urls_in_background,
    config=RETRIEVAL_CONFIG,
    return_intermediates=True,
)

INFO:ranking:Finding 3 search query keywords via LLM...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:ranking:Search queries for NC: ['Reddit API pricing update', 'Reddit CEO Steve Huffman statement', 'Reddit community API response', 'Reddit financial news 2023', 'Will Reddit announce changes or a delay to its proposed API fee pricing before July 1, 2023?', 'Reddit API fee backlash', 'Reddit CEO statement API']
INFO:ranking:Search queries for GNews: ['Reddit API pricing update June 2023', 'Reddit community response to API fees', 'Steve Huffman Reddit API pricing statement', 'Reddit negotiations third-party developers', 'Reddit API fee pricing update 2023', 'Will Reddit

An error occurred while fetching the article: Article `download()` failed with HTTPSConnectionPool(host='www.ctvnews.ca', port=443): Read timed out. (read timeout=7) on URL https://news.google.com/rss/articles/CBMiiAFodHRwczovL3d3dy5jdHZuZXdzLmNhL2J1c2luZXNzL2Rlc3BpdGUtd2lkZXNwcmVhZC1wcm90ZXN0LXJlZGRpdC1jZW8tc2F5cy1jb21wYW55LWlzLW5vdC1uZWdvdGlhdGluZy1vbi0zcmQtcGFydHktYXBwLWNoYXJnZXMtMS42NDQ0OTE50gEA?oc=5&hl=en-US&gl=US&ceid=US:en


INFO:information_retrieval:Retrieved full article text for https://news.google.com/rss/articles/CBMiswFodHRwczovL3d3dy5mb3JiZXMuY29tL3NpdGVzL2FudG9uaW9wZXF1ZW5vaXYvMjAyMy8wNi8xNS9yZWRkaXQtYmxhY2tvdXQtcm9sbHMtb24tZm9yLW1vcmUtdGhhbi01MDAwLXN1YnJlZGRpdHMtcGFzdC1wbGFubmVkLWVuZC1kYXRlLXNvbWUtb2Ytd2hpY2gtcGxhbi10by1zdGF5LWRhcmstaW5kZWZpbml0ZWx5L9IBtwFodHRwczovL3d3dy5mb3JiZXMuY29tL3NpdGVzL2FudG9uaW9wZXF1ZW5vaXYvMjAyMy8wNi8xNS9yZWRkaXQtYmxhY2tvdXQtcm9sbHMtb24tZm9yLW1vcmUtdGhhbi01MDAwLXN1YnJlZGRpdHMtcGFzdC1wbGFubmVkLWVuZC1kYXRlLXNvbWUtb2Ytd2hpY2gtcGxhbi10by1zdGF5LWRhcmstaW5kZWZpbml0ZWx5L2FtcC8?oc=5&hl=en-US&gl=US&ceid=US:en
INFO:information_retrieval:Retrieved full article text for https://news.google.com/rss/articles/CBMiaGh0dHBzOi8vd3d3LmNuYmMuY29tLzIwMjMvMDYvMTYvcmVkZGl0LWluLWNyaXNpcy1hcy1wcm9taW5lbnQtbW9kZXJhdG9ycy1wcm90ZXN0LWFwaS1wcmljZS1pbmNyZWFzZS5odG1s0gFsaHR0cHM6Ly93d3cuY25iYy5jb20vYW1wLzIwMjMvMDYvMTYvcmVkZGl0LWluLWNyaXNpcy1hcy1wcm9taW5lbnQtbW9kZXJhdG9ycy1wcm90ZXN0LWFwaS1wcmljZS1pbmNyZ

In [10]:
all_summaries = summarize.concat_summaries(
    ranked_articles[: RETRIEVAL_CONFIG["NUM_SUMMARIES_THRESHOLD"]]
)

In [11]:
print(all_summaries[:3000], "...")

---
ARTICLES
[1] Reddit CEO lashes out on protests, moderators and third-party apps (published on 2023-06-16)
Summary: Reddit CEO Steve Huffman is standing firm on API changes despite protests. The platform announced plans to charge for API access, sparking backlash from third-party app developers. The developer of Apollo for Reddit, Christian Selig, revealed that the new pricing could cost him $20 million annually, leading to widespread discontent. Huffman has defended the changes, emphasizing the need for profitability. Thousands of subreddits have gone dark in protest, but Huffman has shown no signs of backing down. The company has made exceptions for non-commercial apps, but the majority of third-party developers are facing potential shutdown. Despite the uproar, Huffman has indicated that Reddit is unlikely to make changes to the pricing structure.

[2] Reddit CEO compares moderators to aristocracy as blackout stretches on (published on 2023-06-16)
Summary: Reddit CEO Steve Huffma

## Reasoning

In [12]:
REASONING_CONFIG = {
    "BASE_REASONING_MODEL_NAMES": ["gpt-4-1106-preview", "gpt-4-1106-preview"],
    "BASE_REASONING_TEMPERATURE": 1.0,
    "BASE_REASONING_PROMPT_TEMPLATES": [
        [
            PROMPT_DICT["binary"]["scratch_pad"]["1"],
            PROMPT_DICT["binary"]["scratch_pad"]["2"],
        ],
        [
            PROMPT_DICT["binary"]["scratch_pad"]["new_3"],
            PROMPT_DICT["binary"]["scratch_pad"]["new_6"],
        ],
    ],
    "ALIGNMENT_MODEL_NAME": "gpt-3.5-turbo-1106",
    "ALIGNMENT_TEMPERATURE": 0,
    "ALIGNMENT_PROMPT": PROMPT_DICT["alignment"]["0"],
    "AGGREGATION_METHOD": "meta",
    "AGGREGATION_PROMPT_TEMPLATE": PROMPT_DICT["meta_reasoning"]["0"],
    "AGGREGATION_TEMPERATURE": 0.2,
    "AGGREGATION_MODEL_NAME": "gpt-4",
    "AGGREGATION_WEIGTHTS": None,
}

In [13]:
today_to_close_date = [retrieval_dates[1], question_dates[1]]
ensemble_dict = await ensemble.meta_reason(
    question=question,
    background_info=background_info,
    resolution_criteria=resolution_criteria,
    today_to_close_date_range=today_to_close_date,
    retrieved_info=all_summaries,
    reasoning_prompt_templates=REASONING_CONFIG["BASE_REASONING_PROMPT_TEMPLATES"],
    base_model_names=REASONING_CONFIG["BASE_REASONING_MODEL_NAMES"],
    base_temperature=REASONING_CONFIG["BASE_REASONING_TEMPERATURE"],
    aggregation_method=REASONING_CONFIG["AGGREGATION_METHOD"],
    answer_type="probability",
    weights=REASONING_CONFIG["AGGREGATION_WEIGTHTS"],
    meta_model_name=REASONING_CONFIG["AGGREGATION_MODEL_NAME"],
    meta_prompt_template=REASONING_CONFIG["AGGREGATION_PROMPT_TEMPLATE"],
    meta_temperature=REASONING_CONFIG["AGGREGATION_TEMPERATURE"],
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:model_eval:Finished 2 base reasonings generated by gpt-4-1106-preview
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:model_eval:Finished 2 base reasonings generated by gpt-4-1106-preview
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


## Analysis

In [14]:
# Compute brier score (base_predictions is a list of lists of
# probabilities)
base_brier_scores = []
# For each sublist (corresponding to a base model name)
for base_predictions in ensemble_dict["base_predictions"]:
    base_brier_scores.append(
        [(base_prediction - answer) ** 2 for base_prediction in base_predictions]
    )
    
print(base_brier_scores)

[[0.48999999999999994, 0.010000000000000002], [0.36, 0.0225]]


In [15]:
# Compute brier score of ensembled prediction

print(ensemble_dict["meta_prediction"])

0.25


In [16]:
# Visualization (draw the HTML)
base_html = visualize_all(
    question_data=raw_data[0],
    retrieval_dates=retrieval_dates,
    search_queries_gnews=search_queries_list_gnews,
    search_queries_nc=search_queries_list_nc,
    all_articles=all_articles,
    ranked_articles=ranked_articles,
    all_summaries=all_summaries,
    model_names=REASONING_CONFIG["BASE_REASONING_MODEL_NAMES"],
    base_reasoning_prompt_templates=REASONING_CONFIG[
        "BASE_REASONING_PROMPT_TEMPLATES"
    ],
    base_reasoning_full_prompts=ensemble_dict["base_reasoning_full_prompts"],
    base_reasonings=ensemble_dict["base_reasonings"],
    base_predictions=ensemble_dict["base_predictions"],
    base_brier_scores=base_brier_scores,
)
meta_html = visualize_all_ensemble(
    question_data=raw_data[0],
    ranked_articles=ranked_articles,
    all_articles=all_articles,
    search_queries_gnews=search_queries_list_gnews,
    search_queries_nc=search_queries_list_nc,
    retrieval_dates=retrieval_dates,
    meta_reasoning=ensemble_dict["meta_reasoning"],
    meta_full_prompt=ensemble_dict["meta_prompt"],
    meta_prediction=ensemble_dict["meta_prediction"],
)

In [17]:
base_file_path = "sample_q_base_output.html"
meta_file_path = "sample_q_meta_output.html"

with open(base_file_path, "w") as base_file, open(meta_file_path, "w") as meta_file:
    base_file.write(base_html)
    meta_file.write(meta_html)