# Hacker New Blog Post Analysis

Examines the HN posts to determin which kind get more comments.

In [1]:
from csv import reader
open_file = open('hacker_news.csv')

read_file = reader(open_file)
hn = list(read_file)

hn[:5]

[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'],
 ['12224879',
  'Interactive Dynamic Video',
  'http://www.interactivedynamicvideo.com/',
  '386',
  '52',
  'ne0phyte',
  '8/4/2016 11:52'],
 ['10975351',
  'How to Use Open Source and Shut the Fuck Up at the Same Time',
  'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/',
  '39',
  '10',
  'josep2',
  '1/26/2016 19:30'],
 ['11964716',
  "Florida DJs May Face Felony for April Fools' Water Joke",
  'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/',
  '2',
  '1',
  'vezycash',
  '6/23/2016 22:20'],
 ['11919867',
  'Technology ventures: From Idea to Enterprise',
  'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429',
  '3',
  '1',
  'hswarna',
  '6/17/2016 0:01']]

In [2]:
headers = hn[0]
hn = hn[1:]

hn[:5]

[['12224879',
  'Interactive Dynamic Video',
  'http://www.interactivedynamicvideo.com/',
  '386',
  '52',
  'ne0phyte',
  '8/4/2016 11:52'],
 ['10975351',
  'How to Use Open Source and Shut the Fuck Up at the Same Time',
  'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/',
  '39',
  '10',
  'josep2',
  '1/26/2016 19:30'],
 ['11964716',
  "Florida DJs May Face Felony for April Fools' Water Joke",
  'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/',
  '2',
  '1',
  'vezycash',
  '6/23/2016 22:20'],
 ['11919867',
  'Technology ventures: From Idea to Enterprise',
  'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429',
  '3',
  '1',
  'hswarna',
  '6/17/2016 0:01'],
 ['10301696',
  'Note by Note: The Making of Steinway L1037 (2007)',
  'http://www.nytimes.com/2007/11/07/movies/07stein.html?_r=0',
  '8',
  '2',
  'walterbell',
  '9/30/2015 4:12']]

Here we split the posts into ask posts, show posts, and other posts, so that we can compare the different categories, after which we count the total number of posts.

In [3]:
ask_posts, show_posts, other_posts = [], [], []

for row in hn:
    title = row[1].lower()
    if title.startswith('ask hn'):
        ask_posts.append(row)
    elif title.startswith('show hn'):
        show_posts.append(row)
    else:
        other_posts.append(row)
        
print("Totals")
print("Ask posts: ", len(ask_posts))
print("Show posts: ", len(show_posts))
print("Other posts: ", len(other_posts))


Totals
Ask posts:  1744
Show posts:  1162
Other posts:  17194


We now create a simple function to cycle through each type of post and get the average amount of comments per post.

In [4]:
def avg_comments(posts):
    total_comments = 0

    for row in posts:
        num_comments = int(row[4])
        total_comments += num_comments
    average = total_comments / len(posts)
    return average

print("Average amount of comments per post:")
print("Ask posts: ", avg_comments(ask_posts))
print("Show posts: ", avg_comments(show_posts))
print("Other posts: ", avg_comments(other_posts))

Average amount of comments per post:
Ask posts:  14.038417431192661
Show posts:  10.31669535283993
Other posts:  26.8730371059672


The ask posts seem to generate more responses than the show posts. Interestingly enough, the other posts seem  to generate twice as many comments on average as ask and show posts. I guess people respond better when you are not looking for a reaction

In [38]:
import datetime as dt


def find_avg_post_times(posts):
    result_list = []
    for row in posts:
        created_at = row[6]
        num_comments = int(row[4])
        result_list.append([created_at, num_comments])
   
    counts_by_hour = {}
    comments_by_hour = {}
    
    for row in result_list:
        timestamp = row[0]
        comments = row[1]
        template = "%m/%d/%Y %H:%M"
        time = dt.datetime.strptime(timestamp, template)
        hour = time.strftime('%H')
        
        if hour not in counts_by_hour:
            counts_by_hour[hour] = 1
            comments_by_hour[hour] = comments
        else:
            counts_by_hour[hour] += 1
            comments_by_hour[hour] += comments
        
    avg_by_hour = []
    
    for hour in counts_by_hour:
        average_posts = comments_by_hour[hour] / counts_by_hour[hour]
        avg_by_hour.append([average_posts, hour])
    
    return sorted(avg_by_hour, reverse=True)

ask_avg_times = find_avg_post_times(ask_posts)
show_avg_times = find_avg_post_times(show_posts)
other_avg_times = find_avg_post_times(other_posts)
total_avg_times = find_avg_post_times(hn)

In [39]:
print("Top 5 Hours for Ask Posts Comments")
for row in ask_avg_times[:5]:
    print("{}:00: {:.2f} average comments per post.".format(row[1], row[0]))                     
print("\n")

print("Top 5 Hours for Show Posts Comments")
for row in show_avg_times[:5]:
    print("{}:00: {:.2f} average comments per post.".format(row[1], row[0]))                     
print("\n")

print("Top 5 Hours for Other Posts Comments")
for row in other_avg_times[:5]:
    print("{}:00: {:.2f} average comments per post.".format(row[1], row[0]))                     
print("\n")

print("Top 5 Hours for HN Posts Comments")
for row in total_avg_times[:5]:
    print("{}:00: {:.2f} average comments per post.".format(row[1], row[0]))                     
print("\n")



Top 5 Hours for Ask Posts Comments
15:00: 38.59 average comments per post.
02:00: 23.81 average comments per post.
20:00: 21.52 average comments per post.
16:00: 16.80 average comments per post.
21:00: 16.01 average comments per post.


Top 5 Hours for Show Posts Comments
18:00: 15.77 average comments per post.
00:00: 15.71 average comments per post.
14:00: 13.44 average comments per post.
23:00: 12.42 average comments per post.
22:00: 12.39 average comments per post.


Top 5 Hours for Other Posts Comments
14:00: 32.33 average comments per post.
13:00: 30.90 average comments per post.
12:00: 30.35 average comments per post.
11:00: 29.59 average comments per post.
15:00: 29.52 average comments per post.


Top 5 Hours for HN Posts Comments
14:00: 29.14 average comments per post.
15:00: 29.02 average comments per post.
13:00: 27.73 average comments per post.
12:00: 27.47 average comments per post.
11:00: 27.12 average comments per post.


