In [386]:
import json, os
from openai import OpenAI

openai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
perplexity_client = OpenAI(api_key=os.environ["PERPLEXITY_KEY"], base_url="https://api.perplexity.ai")

In [387]:
background_prompt = open("prompts/background.prompt", "r").read()
subquestions_prompt = open("prompts/subquestions.prompt", "r").read()
probabilities_prompt = open("prompts/probabilities.prompt", "r").read()
jsonify_prompt = open("prompts/jsonify.prompt", "r").read()
boolean_prompt = open("prompts/boolean.prompt", "r").read()

In [388]:
question = """Will the average house price in Roswell, Georgia, USA, be higher this time next year?"""
criteria = """Use your best judgement to determine a reasonable resolution criteria for this question."""
metaculus_background = """NA"""

In [None]:
#step one: get background info on question from perplexity
prompt = background_prompt.format(question=question)
messages = [{'role':'user','content':prompt}]
response = perplexity_client.chat.completions.create(
    seed=42,
    model="llama-3.1-sonar-huge-128k-online",
    messages=messages
)
perplexity_background = response.choices[0].message.content
print(perplexity_background)

In [None]:
#step two: break down question into subquestions
#get current date in yyyy-mm-dd format
from datetime import datetime
current_date = datetime.now().strftime('%Y-%m-%d')
prompt = subquestions_prompt.format(
    question=question,
    criteria=criteria,
    date=current_date,
    metaculus_background=metaculus_background,
    perplexity_background=perplexity_background)
messages = [{'role':'user','content':prompt}]
response = openai_client.chat.completions.create(
    seed=42,
    model="gpt-4o",
    messages=messages
)
subquestions = response.choices[0].message.content
messages += [{'role':'assistant','content':subquestions}]
print(subquestions)

In [None]:
#step three: estimate probabilties for each subquestion
messages += [{'role':'user','content':probabilities_prompt}]
response = openai_client.chat.completions.create(
    seed=42,
    model="gpt-4o",
    messages=messages
)
probabilities = response.choices[0].message.content
messages += [{'role':'assistant','content':probabilities}]
print(probabilities)

In [None]:
#step four: jsonify the results
messages += [{'role':'user','content':jsonify_prompt}]
response = openai_client.chat.completions.create(
    seed=42,
    model="gpt-4o",
    messages=messages,
    response_format={"type":"json_object"}
)
prob_values = response.choices[0].message.content
prob_values = json.loads(prob_values)['questions']
display(prob_values)

In [None]:
#step five: for each subquestion, evaluate the truth value
for entry in prob_values:
    question = entry['question']
    print(question)
    messages = [
        {'role':'user','content':question}]
    response = perplexity_client.chat.completions.create(
        seed=42,
        model="llama-3.1-sonar-small-128k-online",
        messages=messages
    )
    answer = response.choices[0].message.content
    print(answer)
    prompt = boolean_prompt.format(
                question=question, 
                answer=answer)
    message = {'role':'user','content':prompt}
    response = openai_client.chat.completions.create(
        seed=42,
        model="gpt-4o",
        messages=[message]
    )
    response = response.choices[0].message.content.strip()
    print(response)
    print('------')

    assert response in ['YES', 'NO','UNCLEAR']
    if response == 'UNCLEAR':
        continue
    truth = response == 'YES'
    # print(truth)
    entry['truth'] = truth

In [None]:
#step six: combine the results (geo average)
probs = []
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob = entry['ifTrue'] if entry['truth'] else entry['ifFalse']
    probs.append(prob)
prob = 1
for p in probs:
    prob *= p
final_prob = prob**(1/len(probs))
print(final_prob)

In [None]:
#step six: combine the results (weighted average)
probs = []
weights = []
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob = entry['ifTrue'] if entry['truth'] else entry['ifFalse']
    probs.append(prob)
    weight = abs(0.5-prob)**2
    weights.append(weight)
final_prob = sum([p*w for p,w in zip(probs,weights)])/sum(weights)
print(final_prob)

In [None]:
#step six: combine the results (arithmetic averaging)
probs = []
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob = entry['ifTrue'] if entry['truth'] else entry['ifFalse']
    probs.append(prob)
final_prob = sum(probs)/len(probs)
print(final_prob)

In [None]:
#step six: combine the results (median)
import numpy as np
probs = []
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob = entry['ifTrue'] if entry['truth'] else entry['ifFalse']
    probs.append(prob)
final_prob = np.median(probs)
print(final_prob)

In [None]:
#step six: combine the results (minimum)
probs = []
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob = entry['ifTrue'] if entry['truth'] else entry['ifFalse']
    probs.append(prob)
final_prob = min(probs)
print(final_prob)

In [None]:
#step six: combine the results (maximum)
probs = []
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob = entry['ifTrue'] if entry['truth'] else entry['ifFalse']
    probs.append(prob)
final_prob = max(probs)
print(final_prob)

In [None]:
#step six: combine the results (noisy-OR)
prob_not = 1
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob = entry['ifTrue'] if entry['truth'] else entry['ifFalse']
    prob_not *= 1 - prob
final_prob = 1 - prob_not
print(final_prob)

In [None]:
#step six: combine the results (noisy-AND)
prob = 1
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob *= entry['ifTrue'] if entry['truth'] else entry['ifFalse']
final_prob = prob
print(final_prob)

In [None]:
#step six: combine the results (avg between noisy-OR and noisy-AND)
prob_not = 1
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob = entry['ifTrue'] if entry['truth'] else entry['ifFalse']
    prob_not *= 1 - prob
prob_or = 1 - prob_not

prob = 1
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob *= entry['ifTrue'] if entry['truth'] else entry['ifFalse']
prob_and = prob

final_prob = (prob_or + prob_and) / 2
print(final_prob)

In [None]:
#step six: combine the results (naive bayes)
no_event = 1
is_event = 1
for entry in prob_values:
    if 'truth' not in entry:
        continue
    prob = entry['ifTrue'] if entry['truth'] else entry['ifFalse']
    no_event *= 1 - prob
    is_event *= prob

final_prob = is_event / (is_event + no_event)
print(final_prob)

## use AskNews

In [86]:
import os

from asknews_sdk import AskNewsSDK
ask = AskNewsSDK(
    client_id=os.environ["ASKNEWS_ID"],
    client_secret=os.environ["ASKNEWS_SECRET"],
)


In [87]:
response = ask.news.search_news(
    query="tesla stock share price", # your keyword query
    n_articles=10, # control the number of articles to include in the context
    return_type="dicts",  # you can also ask for "dicts" if you want more information
    method="both"  # use "nl" for natural language for your search, or "kw" for keyword search
)

display(response)



In [91]:
for x in response.as_dicts:
    print(x.title)
    print(x.summary)
    print('------')

Tesla (NASDAQ:TSLA) Stock Price Up 1.1% Following Analyst Upgrade
Tesla's (NASDAQ:TSLA) stock price rose 1.1% after Royal Bank of Canada upgraded its price target to $236.00. Several other brokerages have also commented on TSLA, with Morgan Stanley maintaining an 'overweight' rating and a $310.00 target price, while Citigroup decreased its price objective to $258.00 and set a 'neutral' rating. In the last 90 days, insiders have sold 74,661 shares of company stock worth $16,663,291. 25.10% of the stock is currently owned by insiders. The company has a debt-to-equity ratio of 0.08, a current ratio of 1.91 and a quick ratio of 1.40. Tesla's 50-day moving average price is $224.18 and its 200-day moving average price is $202.26. The stock has a market capitalization of $776.41 billion, a price-to-earnings ratio of 62.10, a PEG ratio of 6.79 and a beta of 2.29. According to MarketBeat, Tesla has a consensus rating of 'Hold' and an average target price of $210.90.
------
Tesla, Inc. (NASDAQ:T

# recreate perplexity

In [None]:
# google search api + wayback machine + page minification (pandoc?) + gpt4o interpretation of pages

In [26]:
from datetime import datetime
end_date = datetime(2022, 1, 1)

In [None]:
google_api_key = os.environ["GOOGLE_KEY"]
cse_id = os.environ["GOOGLE_CSE"]

import requests
import json

def google_search(query, end_date:datetime=None, pages=3):

    links = []

    for i in range(0, pages):

        start = i * 10 + 1
        url = f"https://www.googleapis.com/customsearch/v1?start={start}&key={google_api_key}&cx={cse_id}&q={query}"
        if end_date:
            date_str = end_date.strftime("%Y%m%d")
            url += f"&sort=date:r::{date_str}"
        response = requests.get(url)
        response = json.loads(response.text)

        for x in response['items']:
            links.append(x['link'])

    return links

links = google_search("Has Roswell seen a consistent trend of increasing house prices over the past five years?", end_date)
display(links)

In [None]:
import time
from markdownify import markdownify as md

def get_historical_page(page, end_date:datetime=None):

    referer = "https://web.archive.org"

    #get the years that are available
    url = "https://web.archive.org/__wb/sparkline"
    payload = {
        "output": "json",
        "url": page,
        "collection": "web"
    }
    response = requests.get(url, params=payload, headers={"Referer":referer})
    response = json.loads(response.text)

    #get possible years
    years = []
    for x in response['years']:
        x = int(x)
        if x <= end_date.year:
            years.append(x)

    #youngest to oldest
    years = sorted(years, reverse=True)

    #get possible timestamps
    found_date = None
    for year in years:

        time.sleep(1) #avoid rate limiting
        url = "https://web.archive.org/__wb/calendarcaptures/2"
        payload = {
            "url": page,
            "groupby": "day",
            "date":year
        }
        response = requests.get(url, params=payload, headers={"Referer":referer})
        response = json.loads(response.text)
        items = [x[0] for x in response['items']]
        items = sorted(items, reverse=True)
        for x in items:
            day = x % 100
            month = x // 100
            date = datetime(year, month, day)
            if date <= end_date:
                found_date = date
                break

        if found_date:
            break

    if not found_date:
        return None
    
    print(found_date.strftime('%Y%m%d%H%M%S'))

    #get the page contents
    url = f"https://web.archive.org/web/{found_date.strftime('%Y%m%d%H%M%S')}/{page}"
    response = requests.get(url)
    return response.text

N = 7
print(links[N])
page_html = get_historical_page(links[N], end_date)
if page_html:
    with open("page.html", "w", encoding="utf-8") as f:
        f.write(page_html)
    markdown = md(page_html)
    with open("page.md", "w", encoding="utf-8") as f:
        f.write(markdown)
else:
    print("No page found")