In [490]:
from csv import reader
from prettytable import PrettyTable
from typing import List, Iterable
import datetime as dt

# Black formatter for Jupyter Notebooks
%reload_ext jupyter_black

In [491]:
with open("hacker_news.csv", "r") as hn:
    hn = list(reader(hn))

headers = hn[0]
hn = hn[1:]

headers

['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']

In [492]:
ask_posts = []
show_posts = []
other_posts = []

for row in hn:
    title = row[1].lower()

    if title.startswith("ask hn"):  # startswith is case-insensitive
        ask_posts.append(row)
    elif title.startswith("show hn"):
        show_posts.append(row)
    else:
        other_posts.append(row)

# View the number of posts in each list
num_posts_per_type = PrettyTable(["Type of Posts", "Total Comments"])
num_posts_per_type.add_rows(
    [
        ["Ask", len(ask_posts)],
        ["Show", len(show_posts)],
        ["Other", len(other_posts)],
    ]
)
num_posts_per_type

Type of Posts,Total Comments
Ask,1744
Show,1162
Other,17194


In [493]:
def calculate_total_comments(posts: List[List[str]] | Iterable, index: int) -> int:
    """Calculate the total number of comments from the provided posts.

    Args:
      posts: The list of posts.
      index: The index of the field containing the number of comments.

    Returns:
      The total number of comments.
    """
    return sum([int(post[index]) for post in posts])


def calculate_avg_comments(posts: List[List[str]] | Iterable, index: int) -> float:
    """Calculate the average number of comments for the posts.

    Args:
      posts: The list of posts.
      index: The index of the field containing the number of comments.

    Returns:
      The average number of comments, formatted to 4 decimal places.
    """
    return float(f"{calculate_total_comments(posts, index) / len(posts):.4f}")

In [494]:
posts_per_type = PrettyTable(["Type of Posts", "Total Comments", "Average Comments"])
posts_per_type.add_row(
    [
        "Ask",
        calculate_total_comments(ask_posts, 4),
        calculate_avg_comments(ask_posts, 4),
    ]
)
posts_per_type.add_row(
    [
        "Show",
        calculate_total_comments(show_posts, 4),
        calculate_avg_comments(show_posts, 4),
    ]
)
posts_per_type

Type of Posts,Total Comments,Average Comments
Ask,24483,14.0384
Show,11988,10.3167


In [495]:
result_list = [[post[6], int(post[4])] for post in ask_posts]

# TODO: CLEAN UP BELOW CODE
counts_per_hour = {}
comments_per_hour = {}

for row in result_list:
    date, comment = row[0], row[1]
    hour = dt.datetime.strptime(date, "%m/%d/%Y %H:%M").strftime("%H")

    if hour not in counts_per_hour:
        counts_per_hour[hour] = 1
        comments_per_hour[hour] = comment
    else:
        counts_per_hour[hour] += 1
        comments_per_hour[hour] += comment

In [496]:
avg_by_hour = [
    [hour, round((comments_per_hour[hour] / counts_per_hour[hour]), 2)]
    for hour in comments_per_hour
]
avg_by_hour

[['09', 5.58],
 ['13', 14.74],
 ['10', 13.44],
 ['14', 13.23],
 ['16', 16.8],
 ['23', 7.99],
 ['12', 9.41],
 ['17', 11.46],
 ['15', 38.59],
 ['21', 16.01],
 ['20', 21.52],
 ['02', 23.81],
 ['18', 13.2],
 ['03', 7.8],
 ['05', 10.09],
 ['19', 10.8],
 ['01', 11.38],
 ['22', 6.75],
 ['08', 10.25],
 ['04', 7.17],
 ['00', 8.13],
 ['06', 9.02],
 ['07', 7.85],
 ['11', 11.05]]

In [497]:
swap_avg_by_hour = [[x[1], x[0]] for x in avg_by_hour]
sorted_swap = sorted(swap_avg_by_hour, key=lambda x: (x[0], x[1]), reverse=True)
sorted_swap

[[38.59, '15'],
 [23.81, '02'],
 [21.52, '20'],
 [16.8, '16'],
 [16.01, '21'],
 [14.74, '13'],
 [13.44, '10'],
 [13.23, '14'],
 [13.2, '18'],
 [11.46, '17'],
 [11.38, '01'],
 [11.05, '11'],
 [10.8, '19'],
 [10.25, '08'],
 [10.09, '05'],
 [9.41, '12'],
 [9.02, '06'],
 [8.13, '00'],
 [7.99, '23'],
 [7.85, '07'],
 [7.8, '03'],
 [7.17, '04'],
 [6.75, '22'],
 [5.58, '09']]

In [498]:
for post in sorted_swap[:5]:
    time = dt.datetime.strptime(post[1], "%H").strftime("%X")
    avg_comments = post[0]
    print(f"{time}: {avg_comments:.2f} average comments per post")

# One-liner
# for idx in range(5):
#     print(
#     f"{dt.datetime.strptime(sorted_swap[idx][1],"%H").strftime("%X")}: {sorted_swap[idx][0]:.2f} average comments per post"
#     )

15:00:00: 38.59 average comments per post
02:00:00: 23.81 average comments per post
20:00:00: 21.52 average comments per post
16:00:00: 16.80 average comments per post
21:00:00: 16.01 average comments per post
