# Base forecaster against open questions -- test community forecast alignment

## Imports

In [1]:
from forecasting_tools import MetaculusApi, ApiFilter
from datetime import datetime, timedelta
import asyncio, os
import numpy as np
from predict import predict

In [2]:
from load_secrets import load_secrets
load_secrets()

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt

In [4]:
from tqdm import tqdm
tqdm.pandas()

In [8]:
num_of_questions_to_return = 4 # 42

In [9]:
one_year_from_now = datetime.now() + timedelta(days=365)
api_filter = ApiFilter(
    allowed_statuses=["open"],
    num_forecasters_gte=40,
    scheduled_resolve_time_lt=one_year_from_now,
    includes_bots_in_aggregates=False,
)

In [10]:
questions = asyncio.run(MetaculusApi.get_questions_matching_filter(
        api_filter,
        num_questions=num_of_questions_to_return,
        randomly_sample=True))

In [12]:
[type(x) for x in questions]

[forecasting_tools.data_models.questions.BinaryQuestion,
 forecasting_tools.data_models.questions.BinaryQuestion,
 forecasting_tools.data_models.questions.BinaryQuestion,
 forecasting_tools.data_models.questions.BinaryQuestion]

In [13]:
from community_forecast import *

In [14]:
id_to_forecast = {question.api_json['id']: community_forecast(question) for question in questions}

In [15]:
id_to_forecast

{35563: 0.11, 35577: 0.82, 35578: 0.9, 35572: 0.75}

## Forecast the questions

In [None]:
from flatten_dict import flatten_dict
import pandas as pd
from prompt_question import prompt_question
pd.set_option('display.max_columns', None)

In [None]:
qflat = [flatten_dict(q.api_json, sep='_') for q in questions]

In [None]:
df = pd.DataFrame(qflat)

In [None]:
df['crowd'] = df.apply(lambda row: id_to_forecast[row.id], axis=1)

In [None]:
df['question_options'] = df['question_options'].apply(repr)

In [None]:
df = df[['id',
 'open_time',
 'scheduled_resolve_time',
 'title',
 'question_description',
 'question_resolution_criteria',
 'question_fine_print',
 'question_type',
 'question_options',
 'question_group_variable',
 'question_question_weight',
 'question_unit',
 'question_open_upper_bound',
 'question_open_lower_bound',
 'question_scaling_range_max',
 'question_scaling_range_min',
 'crowd']]

In [None]:
dfn = 'forecast_community'
os.makedirs(dfn, exist_ok=True)

In [None]:
df['today'] = datetime.now().strftime("%Y-%m-%d")

In [None]:
from ResearchProModule import ResearchProModule

In [None]:
bot = ResearchProModule()
bot.process_dataframe(df)

In [None]:
from load_research import load_research

In [None]:
df['research'] = df.apply(load_research, axis=1)

In [None]:
df['prompt'] = df.apply(prompt_question, axis=1)

In [None]:
df[df.question_type == 'multiple_choice']

In [None]:
df['forecast'] = df.progress_apply(lambda question: predict(dfn, question), axis=1)

In [None]:
from extract_forecast import extract_forecast

In [None]:
df['prediction'] = df.apply(extract_forecast, axis=1)

## Compare crowd and forecast

In [None]:
from error import error

## Assess performance

In [None]:
df = df[~df.crowd.apply(lambda x: x is None)].copy()

In [None]:
df['error'] = df.apply(error, axis=1)

In [None]:
df

In [None]:
plt.hist(df.error.values);

In [None]:
df.to_json('community_results.json', indent=4)

In [None]:
df1 = df[['title', 'question_type', 'prediction', 'crowd', 'error']]

In [None]:
df1

In [None]:
df1.to_csv('community.csv')

https://www.perplexity.ai/search/here-are-some-questions-a-ques-fgZ1.vMOS1Sa.rOC1G3b7w