# Base forecaster against open questions -- test community forecast alignment

## Imports

In [None]:
from forecasting_tools import MetaculusApi, ApiFilter
from datetime import datetime, timedelta
import asyncio, os
import numpy as np
from predict import predict

In [None]:
from load_secrets import load_secrets
load_secrets()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
from tqdm import tqdm
tqdm.pandas()

## Question sample

In [None]:
num_of_questions_to_return = 42

In [None]:
one_year_from_now = datetime.now() + timedelta(days=365)
api_filter = ApiFilter(
    allowed_statuses=["open"],
    num_forecasters_gte=40,
    scheduled_resolve_time_lt=one_year_from_now,
    includes_bots_in_aggregates=False,
)

In [None]:
questions = asyncio.run(MetaculusApi.get_questions_matching_filter(
        api_filter,
        num_questions=num_of_questions_to_return,
        randomly_sample=True))

In [None]:
question_binary = [question for question in questions if question.api_json['question']['type'] == 'binary'][0]

In [None]:
question_multiple_choice = [question for question in questions if question.api_json['question']['type'] == 'multiple_choice'][0]

In [None]:
question_numeric = [question for question in questions if question.api_json['question']['type'] == 'numeric'][0]

## Community forecast

In [None]:
from community_forecast import *

### Numeric

In [None]:
community_forecast_numeric(question_numeric)

### Binary

In [None]:
community_forecast_binary(question_binary)

### Multiple choice

In [None]:
community_forecast_multiple_choice(question_multiple_choice)

## All

In [None]:
id_to_forecast = {question.api_json['id']: community_forecast(question) for question in questions}

In [None]:
id_to_forecast

## Forecast the questions

In [None]:
from flatten_dict import flatten_dict
import pandas as pd
from prompt_question import prompt_question
pd.set_option('display.max_columns', None)

qflat = [flatten_dict(q.api_json, sep='_') for q in questions]

df = pd.DataFrame(qflat)

df.iloc[26]

df['crowd'] = df.apply(lambda row: id_to_forecast[row.id], axis=1)

df['question_options'] = df['question_options'].apply(repr)

df = df[['id',
 'open_time',
 'scheduled_resolve_time',
 'title',
 'question_description',
 'question_resolution_criteria',
 'question_fine_print',
 'question_type',
 'question_options',
 'question_group_variable',
 'question_question_weight',
 'question_unit',
 'question_open_upper_bound',
 'question_open_lower_bound',
 'question_scaling_range_max',
 'question_scaling_range_min',
 'crowd']]

dfn = 'forecast_community'
os.makedirs(dfn, exist_ok=True)

df['today'] = datetime.now().strftime("%Y-%m-%d")

from ResearchProModule import ResearchProModule

bot = ResearchProModule()
bot.process_dataframe(df)

from load_research import load_research

df['research'] = df.apply(load_research, axis=1)

df['prompt'] = df.apply(prompt_question, axis=1)

df[df.question_type == 'multiple_choice']

df['forecast'] = df.progress_apply(lambda question: predict(dfn, question), axis=1)

from extract_forecast import extract_forecast

df['prediction'] = df.apply(extract_forecast, axis=1)

## Compare crowd and forecast

In [None]:
from error import error

## Assess performance

In [None]:
df = df[~df.crowd.apply(lambda x: x is None)].copy()

In [None]:
df['error'] = df.apply(error, axis=1)

In [None]:
df

In [None]:
plt.hist(df.error.values);

In [None]:
df.to_json('community_results.json', indent=4)

In [None]:
df1 = df[['title', 'question_type', 'prediction', 'crowd', 'error']]

In [None]:
df1

In [None]:
df1.to_csv('community.csv')

https://www.perplexity.ai/search/here-are-some-questions-a-ques-fgZ1.vMOS1Sa.rOC1G3b7w