In [1]:
import requests as r
import time
from datetime import datetime
from datetime import timedelta
import streamlit as st
import pandas as pd
import altair as alt
from streamlit_jupyter import StreamlitPatcher, tqdm

StreamlitPatcher().jupyter()  # register streamlit with jupyter-compatible wrappers

'''
All HN API requests are constructed using the following reference: https://github.com/HackerNews/API
'''

base_url = "https://hacker-news.firebaseio.com/v0/"
human_display_item_url = "https://news.ycombinator.com/item?id="
new_stories_slug = "newstories"
item_slug = "item/"
json_append_slug = ".json"

In [2]:
### Get Story IDs for the newest stories from the HN New Stories endpoint
new_stories_response = r.get(base_url + new_stories_slug + json_append_slug)
new_stories_json = new_stories_response.json()

In [3]:
# returns datetime.timedelta object representing time between execution and story posting
def get_td_from_story(story):
    story_unix_time = story['time']
    story_datetime = datetime.fromtimestamp(story_unix_time)
    timedelta_since_post = datetime.now() - story_datetime
    return timedelta_since_post

def get_comments_per_minute_from_story(timedelta_since_post, num_comments):
    minutes_since_post = timedelta_since_post.seconds / 60
    comments_per_minute = (num_comments * 1.0) / minutes_since_post
    return comments_per_minute

def format_timedelta(timedelta_since_post):
    hours_since_post, remainder = divmod(timedelta_since_post.seconds, 3600)
    minutes_since_post, seconds = divmod(remainder, 60)
    time_since_post = str(hours_since_post) + 'h' + str(minutes_since_post) + 'm'
    return time_since_post

def get_comment_timestamps(story_json, delay_in_seconds):
    comments = story_json['kids']
    print("Total number of timestamps we're getting: " + str(len(comments)))
    comment_timestamps = []
    for comment_id in comments:
        comment_response = r.get(base_url + item_slug + str(comment_id) + json_append_slug)
        comment = comment_response.json()
        comment_timestamps.append(comment['time'])
        time.sleep(delay_in_seconds)
    return comment_timestamps

# Structure: story_id: {num_comments, }
story_info_tracker = {}
num_stories = len(new_stories_json)
for i, story_id in enumerate(new_stories_json):
    story_response = r.get(base_url + item_slug + str(story_id) + json_append_slug)
    story = story_response.json()
    if 'descendants' in story:
        num_comments = story['descendants']
    elif ('dead' in story and story['dead']):
        continue
    else:
        raise Exception("Couldn't find descendants in story: " + str(story))
    title = story['title']
    timedelta_since_post = get_td_from_story(story)
    comments_per_minute = get_comments_per_minute_from_story(timedelta_since_post, num_comments)
    time_since_post = format_timedelta(timedelta_since_post)
    if num_comments > 0:
        print("Pulling comment info for story with " + str(num_comments) + " comments")
        comment_timestamps = get_comment_timestamps(story, 0.1)
    else:
        comment_timestamps = []
    story_info_tracker[story_id] = {
        'num_comments': num_comments,
        'title': title,
        'timedelta_since_post': timedelta_since_post,
        'time_since_post': time_since_post,
        'comments_per_minute': comments_per_minute,
        'url': human_display_item_url + str(story_id),
        'comment_timestamps': comment_timestamps,
    }
    print("Pulled info for story " + str(i) + "/" + str(num_stories))
    if i == 50:
        break

Pulled info for story 0/500
Pulled info for story 1/500
Pulled info for story 2/500
Pulling comment info for story with 1 comments
Total number of timestamps we're getting: 1
Pulled info for story 3/500
Pulled info for story 4/500
Pulled info for story 5/500
Pulled info for story 6/500
Pulled info for story 7/500
Pulled info for story 8/500
Pulled info for story 9/500
Pulled info for story 10/500
Pulled info for story 11/500
Pulling comment info for story with 1 comments
Total number of timestamps we're getting: 1
Pulled info for story 12/500
Pulled info for story 13/500
Pulled info for story 14/500
Pulled info for story 15/500
Pulled info for story 16/500
Pulled info for story 17/500
Pulled info for story 18/500
Pulled info for story 19/500
Pulled info for story 20/500
Pulled info for story 21/500
Pulled info for story 22/500
Pulled info for story 23/500
Pulled info for story 24/500
Pulled info for story 25/500
Pulled info for story 26/500
Pulled info for story 27/500
Pulled info for 

In [4]:
# Gets comments per minute from an array of unix timestamps representing comment times, and minutes in the past to consider recent.
def get_comments_per_minute_from_recent_comments(comment_unix_timestamp_array, minutes_to_consider_recent):
    recency_datetime = datetime.now() - timedelta(minutes=minutes_to_consider_recent)
    filtered_comments = [comment for comment in comment_unix_timestamp_array if comment >= recency_datetime.timestamp()]
    filtered_comments_sorted = sorted(filtered_comments)
    if len(filtered_comments_sorted) <= 1:
        return 0
    first_comment_timestamp = datetime.fromtimestamp(filtered_comments_sorted[0])
    last_comment_timestamp = datetime.fromtimestamp(filtered_comments_sorted[-1])
    timedelta_between_first_last_comment = last_comment_timestamp - first_comment_timestamp
    minutes_between_first_last_comment = (timedelta_between_first_last_comment.seconds * 1.0) / 60
    return (len(comment_unix_timestamp_array) * 1.0) / minutes_between_first_last_comment

def get_stories_with_recent_comment_velocity(stories, recency_in_minutes):
    stories_with_recent_comment_velocity = {}
    for story_id, story in stories.items():
        recent_comment_velocity = get_comments_per_minute_from_recent_comments(story['comment_timestamps'], recency_in_minutes)
        story['recent_comments_per_minute'] = recent_comment_velocity
        stories_with_recent_comment_velocity[story_id] = story
    return stories_with_recent_comment_velocity

def convert_sorted_stories_to_display_list(sorted_stories):
    original_list = [sorted_story[1] for sorted_story in sorted_stories]
    display_list = []
    for story in original_list:
        display_story = story.copy()
        if 'comment_timestamps' in display_story:
            del display_story['comment_timestamps']
        display_list.append(display_story)
    return display_list

def get_recent(sorted_stories_display_list, minutes_to_consider_recent):
    return [sorted_story for sorted_story in sorted_stories_display_list \
            if (sorted_story['timedelta_since_post'].seconds / 60) < minutes_to_consider_recent]


###
###
###

## EDIT THIS TO CHANGE DEFINITION OF "RECENT COMMENT"
MINUTES_TO_CONSIDER_RECENT = 120


###
###
###

## SORT AND FORMAT STORIES BY RELEVANT METRICS

story_info_tracker = get_stories_with_recent_comment_velocity(story_info_tracker, MINUTES_TO_CONSIDER_RECENT)

num_comments_sorted_stories = sorted(story_info_tracker.items(), key=lambda x: x[1]['num_comments'], reverse=True)
comment_velocity_sorted_stories = sorted(story_info_tracker.items(), key=lambda x: x[1]['comments_per_minute'], reverse=True)
recent_comment_velocity_sorted_stories = sorted(story_info_tracker.items(), key=lambda x: x[1]['recent_comments_per_minute'], reverse=True)

num_comments_sorted_stories_display = convert_sorted_stories_to_display_list(num_comments_sorted_stories)
comment_velocity_sorted_stories_display = convert_sorted_stories_to_display_list(comment_velocity_sorted_stories)
recent_comment_velocity_sorted_stories_display = convert_sorted_stories_to_display_list(recent_comment_velocity_sorted_stories)

In [5]:
# get_recent(comment_velocity_sorted_stories_display, (3 * 60))
# get_recent(num_comments_sorted_stories_display, (3*60))

# recent_comment_velocity_sorted_stories_display

get_recent(comment_velocity_sorted_stories_display, (6 * 60))

[{'num_comments': 1,
  'title': 'How Better Tech Could Save Lives in a World of Bigger, Faster Fires',
  'timedelta_since_post': datetime.timedelta(seconds=504, microseconds=725621),
  'time_since_post': '0h8m',
  'comments_per_minute': 0.11904761904761904,
  'url': 'https://news.ycombinator.com/item?id=37366962',
  'recent_comments_per_minute': 0},
 {'num_comments': 7,
  'title': 'Vigil, the eternal morally vigilant programming language',
  'timedelta_since_post': datetime.timedelta(seconds=4103, microseconds=60187),
  'time_since_post': '1h8m',
  'comments_per_minute': 0.10236412381184498,
  'url': 'https://news.ycombinator.com/item?id=37366678',
  'recent_comments_per_minute': 0.1448692152917505},
 {'num_comments': 12,
  'title': 'How to type “blimpy” in Emacs [video]',
  'timedelta_since_post': datetime.timedelta(seconds=8131, microseconds=397846),
  'time_since_post': '2h15m',
  'comments_per_minute': 0.08854999385069487,
  'url': 'https://news.ycombinator.com/item?id=37366341',
 

In [6]:
comment_velocity_df = pd.DataFrame.from_records(comment_velocity_sorted_stories_display)

In [7]:
comment_velocity_df

Unnamed: 0,num_comments,title,timedelta_since_post,time_since_post,comments_per_minute,url,recent_comments_per_minute
0,1,How Better Tech Could Save Lives in a World of...,0 days 00:08:24.725621,0h8m,0.119048,https://news.ycombinator.com/item?id=37366962,0.0
1,7,"Vigil, the eternal morally vigilant programmin...",0 days 01:08:23.060187,1h8m,0.102364,https://news.ycombinator.com/item?id=37366678,0.144869
2,12,How to type “blimpy” in Emacs [video],0 days 02:15:31.397846,2h15m,0.08855,https://news.ycombinator.com/item?id=37366341,0.189274
3,1,Elon Musk's X will use public data to train AI...,0 days 00:33:36.330213,0h33m,0.029762,https://news.ycombinator.com/item?id=37366837,0.0
4,2,CIA Chilling Document Reveals Presence of Gian...,0 days 01:17:49.541946,1h17m,0.025701,https://news.ycombinator.com/item?id=37366636,0.122951
5,2,Thousands of Old Wind Turbine Blades Pile Up i...,0 days 02:12:41.550839,2h12m,0.015073,https://news.ycombinator.com/item?id=37366353,0.0
6,1,"Yes, There Are Rich Men North of Richmond",0 days 01:24:13.578987,1h24m,0.011874,https://news.ycombinator.com/item?id=37366607,0.036552
7,1,A Hacker-Friendly Software Package for Your Ne...,0 days 01:46:23.152857,1h46m,0.0094,https://news.ycombinator.com/item?id=37366491,0.0
8,1,Light Years Ahead: The 1969 Apollo Guidance Co...,0 days 01:47:39.681291,1h47m,0.009289,https://news.ycombinator.com/item?id=37366486,0.0
9,1,Hurricane Idalia floodwaters cause Tesla to co...,0 days 01:54:13.482377,1h54m,0.008755,https://news.ycombinator.com/item?id=37366448,0.0


In [11]:
c = (alt.Chart(comment_velocity_df)
        .mark_area(opacity=0.3)
        .encode(
            x='timedelta_since_post',
            y='comments_per_minute',
            color='title',
    ))
st.altair_chart(c, use_container_width=True)

DeltaGenerator()

In [12]:
st.dataframe(comment_velocity_df)

Unnamed: 0,num_comments,title,timedelta_since_post,time_since_post,comments_per_minute,url,recent_comments_per_minute
0,1,How Better Tech Could Save Lives in a World of...,0 days 00:08:24.725621,0h8m,0.119048,https://news.ycombinator.com/item?id=37366962,0.0
1,7,"Vigil, the eternal morally vigilant programmin...",0 days 01:08:23.060187,1h8m,0.102364,https://news.ycombinator.com/item?id=37366678,0.144869
2,12,How to type “blimpy” in Emacs [video],0 days 02:15:31.397846,2h15m,0.08855,https://news.ycombinator.com/item?id=37366341,0.189274
3,1,Elon Musk's X will use public data to train AI...,0 days 00:33:36.330213,0h33m,0.029762,https://news.ycombinator.com/item?id=37366837,0.0
4,2,CIA Chilling Document Reveals Presence of Gian...,0 days 01:17:49.541946,1h17m,0.025701,https://news.ycombinator.com/item?id=37366636,0.122951
5,2,Thousands of Old Wind Turbine Blades Pile Up i...,0 days 02:12:41.550839,2h12m,0.015073,https://news.ycombinator.com/item?id=37366353,0.0
6,1,"Yes, There Are Rich Men North of Richmond",0 days 01:24:13.578987,1h24m,0.011874,https://news.ycombinator.com/item?id=37366607,0.036552
7,1,A Hacker-Friendly Software Package for Your Ne...,0 days 01:46:23.152857,1h46m,0.0094,https://news.ycombinator.com/item?id=37366491,0.0
8,1,Light Years Ahead: The 1969 Apollo Guidance Co...,0 days 01:47:39.681291,1h47m,0.009289,https://news.ycombinator.com/item?id=37366486,0.0
9,1,Hurricane Idalia floodwaters cause Tesla to co...,0 days 01:54:13.482377,1h54m,0.008755,https://news.ycombinator.com/item?id=37366448,0.0
