In [None]:
%pip install -q ipython-autotime
%load_ext autoreload
%autoreload 2
%load_ext autotime
%config Completer.use_jedi = False
import pandas as pd

pd.options.display.max_columns = 30

In [None]:
import pandas as pd
import clickhouse_connect

client = clickhouse_connect.get_client(host="localhost")
query = """
SELECT
    p.id AS post_id,
    p.by AS post_author,
    p.time AS post_time,
    p.title AS post_title,
    p.text AS post_text,
    c.id AS comment_id,
    c.by AS comment_author,
    c.time AS comment_time,
    c.text AS comment_text
FROM
    hacker_news p
JOIN
    hacker_news c ON c.parent = p.id
WHERE
    p.id IN (
        36643772,  -- 2024
        34125628,  -- 2023
        29746236,  -- 2022
        25594068,  -- 2021
        21802596,  -- 2020
        18753859,  -- 2019
        16007988,  -- 2018
        10809767,  -- 2016
        8822723,   -- 2015
        6994370,   -- 2014
        3395201,   -- 2012
        1970023,   -- 2011
        1025681    -- 2010
    )
    AND c.type = 'comment'
ORDER BY
    p.time DESC, c.time DESC
"""
df = client.query_df(query)
df = df.sort_values("comment_time").reset_index(drop=True)
df.to_parquet("df.parquet")

In [None]:
df = pd.read_parquet("df.parquet")
df

In [None]:
df.comment_id.nunique()

In [None]:
# A small hack from Claude to get the dates from posts
predictions_dates = {
    "Ask HN: What are your predictions for 2023?": {
        "creation_date": "2022-12-31",
        "evaluation_date": "2024-01-01",
    },
    "Ask HN: What are you predictions for 2022?": {
        "creation_date": "2021-12-31",
        "evaluation_date": "2023-01-01",
    },
    "Ask HN: Predictions for 2021?": {
        "creation_date": "2020-12-31",
        "evaluation_date": "2022-01-01",
    },
    "Ask HN: Predictions for 2020 and the New Decade?": {
        "creation_date": "2019-12-31",
        "evaluation_date": "2021-01-01",
    },
    "Ask HN: Your predictions for 2019?": {
        "creation_date": "2018-12-31",
        "evaluation_date": "2020-01-01",
    },
    "Ask HN: Your predictions for 2018?": {
        "creation_date": "2017-12-31",
        "evaluation_date": "2019-01-01",
    },
    "Ask HN: What are your predictions for 2016?": {
        "creation_date": "2015-12-31",
        "evaluation_date": "2017-01-01",
    },
    "Ask HN: What are your predictions for 2015?": {
        "creation_date": "2014-12-31",
        "evaluation_date": "2016-01-01",
    },
    "Ask HN: What are your 2014 Predictions?": {
        "creation_date": "2013-12-31",
        "evaluation_date": "2015-01-01",
    },
    "Ask HN: Your predictions for 2012?": {
        "creation_date": "2011-12-31",
        "evaluation_date": "2013-01-01",
    },
    "Ask HN: 2011 Predictions": {
        "creation_date": "2010-12-31",
        "evaluation_date": "2012-01-01",
    },
    "Ask HN: A New Decade. Any Predictions?": {
        "creation_date": "2009-12-31",
        "evaluation_date": "2020-01-01",
    },
}

In [None]:
# For grammar https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md
json_schema = {
    "type": "object",
    "properties": {
        "predictions": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "text": {
                        "type": "string",
                        "description": "The text of the prediction",
                    },
                    "result_evaluation": {
                        "type": "string",
                        "description": "The evaluation of the prediction result",
                    },
                    "result_enum": {
                        "type": "string",
                        "enum": [
                            "correct",
                            "mostly correct",
                            "mostly wrong",
                            "wrong",
                            "can not be decided",
                        ],
                        "description": "The result of the prediction",
                    },
                    "can_be_evaluated_already": {
                        "type": "boolean",
                        "description": "Whether the prediction can be evaluated already",
                    },
                    "category": {
                        "type": "string",
                        "description": "The general category of the prediction (e.g., Markets, Technology, Politics, etc.)",
                    },
                    "sub_category": {
                        "type": "string",
                        "description": "A more specific sub-category (e.g., S&P, Crypto, AI, etc.)",
                    },
                    "explanation": {
                        "type": "string",
                        "description": "Explanation of the result evaluation",
                    },
                },
                "required": [
                    "text",
                    "result_enum",
                    "can_be_evaluated_already",
                    "category",
                    "sub_category",
                ],
            },
        }
    },
    "required": ["predictions"],
}

In [None]:
import json


# Prompt for https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-70B-GGUF
core_prompt = """You are a helpful assistant that extracts the predictions and evaluates them based on their correctness and answers in JSON. Here's the json schema you must adhere to:
<schema>
{schema}
</schema>
Example input:
"Evaluation date: 2024-01-01. Prediction date: 2023-12-31. Text: Markets: S&P -15% by Q2, crypto -40%, Fed funds rate >=5%, energy best performing sector. Ukraine war continues. Layoffs continue."
    
Expected JSON output:
    {{
        "predictions": [
            {{
                "text": "S&P -15% by Q2",
                "result_evaluation": "The S&P fell by 10% by Q2",
                "result_enum": "mostly correct",
                "can_be_evaluated_already": true,
                "category": "Markets",s
                "sub_category": "S&P",
                "explanation": "The S&P did not fall by the predicted 15%, but it came close."
            }},
            {{
                "text": "Ukraine war continues",
                "result_evaluation": "The war in Ukraine is still ongoing.",
                "result_enum": "correct",
                "can_be_evaluated_already": true,
                "category": "Politics",
                "sub_category": "Geopolitics",
                "explanation": "The prediction was straightforward and correct as the conflict is still active."
            }},
            {{
                "text": "Layoffs continue",
                "result_evaluation": "Layoffs have continued in several sectors, especially in tech.",
                "result_enum": "correct",
                "can_be_evaluated_already": true,
                "category": "Economy",
                "sub_category": "Employment",
                "explanation": "Data shows a continued trend of layoffs in the tech industry and beyond."
            }}
        ]
    }}"""
prompt = f"""<|im_start|>system
{core_prompt}
<|im_end|>"""
prompt = prompt.replace("{schema}", json.dumps(json_schema))

In [None]:
import requests
from tqdm import tqdm

url = "http://localhost:8080/completion"
headers = {"Content-Type": "application/json"}


def gen_prompt(t):
    dates = predictions_dates[t.post_title]
    single_prompt = f"""<|im_start|>user
    Evaluate the following prediction:
    Evaluation date: {dates['evaluation_date']}. Prediction date: {dates['creation_date']}. Text: {t.comment_text}<|im_end|><|im_start|>assistant"""
    return single_prompt


def parse_incomplete_json(json_string):
    # Attempt to find the last complete object
    last_complete_object = json_string.rfind("}")
    if last_complete_object == -1:
        return None  # No complete object found

    # Trim the string to the last complete object
    trimmed_json = json_string[: last_complete_object + 1]

    # Add closing brackets if needed
    while trimmed_json.count("[") > trimmed_json.count("]"):
        trimmed_json += "]"
    while trimmed_json.count("{") > trimmed_json.count("}"):
        trimmed_json += "}"

    try:
        parsed_data = json.loads(trimmed_json)
        return parsed_data
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return None

In [None]:
import sqlite3


def create_predictions_table():
    connection = sqlite3.connect("predictions.db")
    cursor = connection.cursor()

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS predictions (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            text TEXT NOT NULL,
            result_enum TEXT NOT NULL,
            can_be_evaluated_already BOOLEAN NOT NULL,
            category TEXT NOT NULL,
            sub_category TEXT NOT NULL,
            explanation TEXT,
            comment_author TEXT NOT NULL,
            post_id INTEGER NOT NULL,
            comment_id INTEGER NOT NULL
        )
    """)

    connection.commit()
    connection.close()


def insert_prediction(prediction, model="NousResearch/Hermes-2-Theta-Llama-3-70B-GGUF"):
    connection = sqlite3.connect("predictions.db")
    cursor = connection.cursor()

    cursor.execute(
        """
        INSERT INTO predictions (
            text,
            result_enum,
            can_be_evaluated_already,
            category,
            sub_category,
            explanation,
            comment_author,
            post_id,
            comment_id
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """,
        (
            prediction["text"],
            prediction["result_enum"],
            prediction["can_be_evaluated_already"],
            prediction["category"],
            prediction["sub_category"],
            prediction.get("explanation", None),
            prediction["comment_author"],
            prediction["post_id"],
            prediction["comment_id"]
        ),
    )

    connection.commit()
    connection.close()


def comment_id_exists(comment_id):
    connection = sqlite3.connect("predictions.db")
    cursor = connection.cursor()

    cursor.execute(
        """
        SELECT 1 FROM predictions WHERE comment_id = ?
    """,
        (comment_id,),
    )

    result = cursor.fetchone()

    connection.close()

    return result is not None

# create_predictions_table()

In [None]:
for t in tqdm(df.itertuples()):
    if comment_id_exists(t.comment_id):
        continue
    if len(t.comment_text) < 5:
        continue
    single_prompt = gen_prompt(t)
    data = {
        # "prompt": single_prompt,
        "prompt": prompt + single_prompt,
        "json_schema": json_schema,
    }
    response = requests.post(url, headers=headers, data=json.dumps(data))
    try:
        if response.status_code == 200:
            json_string = response.json()["content"]
            res = parse_incomplete_json(json_string)
            pred_list = res["predictions"]
            for prediction in pred_list:
                insert_prediction(
                    {
                        **prediction,
                        **{
                            "comment_author": t.comment_author,
                            "post_id": t.post_id,
                            "comment_id": t.comment_id,
                        },
                    }
                )
            print(pred_list)
        else:
            print(f"Request failed with status code {response.status_code}")
            continue
    except Exception as e:
        print(f"error {e}")
        continue

In [None]:
import pandas as pd
import json

conn = sqlite3.connect("predictions.db")
df = pd.read_sql_query("SELECT * FROM predictions", conn)
conn.close()
df

In [None]:
df[df.text.str.lower().str.contains("bitcoin")].result_enum.value_counts()

In [None]:
df.can_be_evaluated_already = df.can_be_evaluated_already.astype(bool)
df = df[df.text != ""]
df = df.iloc[:, 1:]
data = df.to_dict("records")
ts_content = "export default " + json.dumps(data, indent=2) + ";"
with open("data.ts", "w") as f:
    f.write(ts_content)

In [None]:
import pandas as pd
import numpy as np


def calculate_success_rate(group):
    total = len(group)
    correct = sum(group == "correct")
    mostly_correct = sum(group == "mostly correct")
    success_rate = (correct + 0.5 * mostly_correct) / total
    return success_rate, total


MIN_PREDICTIONS = 10

# Weighted success rate for categories
category_stats = df.groupby("category")["result_enum"].apply(calculate_success_rate)
category_stats = pd.DataFrame(
    category_stats.tolist(),
    index=category_stats.index,
    columns=["success_rate", "total_predictions"],
)
category_stats = category_stats[category_stats["total_predictions"] >= MIN_PREDICTIONS]
category_weighted_score = category_stats["success_rate"] * np.log1p(
    category_stats["total_predictions"]
)
top_10_categories = category_weighted_score.sort_values(ascending=False).head(10)

# Weighted success rate for authors
author_stats = df.groupby("comment_author")["result_enum"].apply(calculate_success_rate)
author_stats = pd.DataFrame(
    author_stats.tolist(),
    index=author_stats.index,
    columns=["success_rate", "total_predictions"],
)
author_stats = author_stats[author_stats["total_predictions"] >= MIN_PREDICTIONS]
author_weighted_score = author_stats["success_rate"] * np.log1p(
    author_stats["total_predictions"]
)
top_10_authors = author_weighted_score.sort_values(ascending=False).head(10)

# Print results
print(f"Top 10 categories (minimum {MIN_PREDICTIONS} predictions):")
for category, score in top_10_categories.items():
    success_rate = category_stats.loc[category, "success_rate"]
    total_preds = category_stats.loc[category, "total_predictions"]
    print(f"{category}: {success_rate:.2%} success rate, {total_preds} predictions")

print(f"\nTop 10 authors (minimum {MIN_PREDICTIONS} predictions):")
for author, score in top_10_authors.items():
    success_rate = author_stats.loc[author, "success_rate"]
    total_preds = author_stats.loc[author, "total_predictions"]
    example_comment = df[df["comment_author"] == author]["comment_id"].iloc[0]
    print(
        f"{author}: {success_rate:.2%} success rate, {total_preds} predictions, Example comment ID: {example_comment}"
    )

# Additional statistics
total_predictions = len(df)
overall_success_rate, _ = calculate_success_rate(df["result_enum"])
print(f"\nTotal predictions: {total_predictions}")
print(f"Overall success rate: {overall_success_rate:.2%}")