I found "Ask HN: Most interesting tech you built for just yourself?" interesting and I want to get inspired by the ideas there. However it's difficult to read as is. I grabbed top-level comments for now. I also added a one-sentence summary from GPT3.5 to make it shorter.

In [1]:
import os

import requests

from bs4 import BeautifulSoup
from itertools import count

import json
from pathlib import Path
from functools import lru_cache

from dataclasses import dataclass

from IPython.display import display, HTML

In [2]:
@dataclass(eq=True, frozen=True, kw_only=True)
class Comment():
    comment: str
    author: str
    id_: str
        
@dataclass(eq=True, frozen=True, kw_only=True)
class CommentWithSummary():
    c: Comment
    summary: str

In [3]:
def top_level_comments(page):
    def is_top_level(el):
        return el.parent.find('a', class_='clicky').text == 'prev'
    
    def make_comment(el):
        author = el.parent.find('a', class_='hnuser').text
        id_ = int(el.parent.find('span').find(
            lambda x: x.has_attr('id') and x['id'].startswith('unv_'))['id'][4:])

        return Comment(comment=el.find('span').text, author=author, id_=id_)
    
    response = requests.get(page)
    if not response.ok:
        return []
    
    soup = BeautifulSoup(response.content, 'html.parser')
    return [make_comment(el) for el in soup.find_all('div', class_='comment') if is_top_level(el)]

In [4]:
all_comments = []

for p in count(start=1): 
    all_comments.extend(more := top_level_comments(f"https://news.ycombinator.com/item?id=35729232&p={p}"))
    if not more:
        break

In [5]:
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

@lru_cache(maxsize=None)
def get_completion(instruction):
    return openai.Completion.create(
        model="text-davinci-003",
        prompt=instruction + "\n",
        temperature=0,
        max_tokens=1000,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )["choices"][0]["text"].strip()

In [6]:
file_ = Path('ideas.json')

if not file_.exists():
    data = []

else:
    with file_.open() as f:
        data = json.load(f)

In [7]:
existing = {c['c']['id_'] for c in data}

for c in all_comments:
    if c.id_ in existing:
        continue
        
    summary = get_completion(f"""
summarise key idea from the following comment in one-sentence: {c}
""")
    data.append(CommentWithSummary(c=c.__dict__, summary=summary).__dict__) 
        
with file_.open(mode='w') as f:
    json.dump(data, f, indent=4)

In [8]:
data = sorted(data, key=lambda c: c['c']['id_'])

In [9]:
for c in data:
    link = f"<a href='https://news.ycombinator.com/item?id={c['c']['id_']}'>{c['c']['id_']}</a>"
    title = f"--- Summary #{link} by {c['c']['author']} ---"
    display(HTML(f"{title}<br>{c['summary']}"))