# Analysis of Ask HN and Show HN Submissions to Hacker News

Hacker News is a popular website, particularly in technology and start-up communities. This project will analyze user submitted posts to Hacker News that either ask the HN community a specific question or share an interesting project or topic. 

Project completed in Python: Intermediate course in the Dataquest Data Engineering Track.

In [45]:
from csv import reader
opened_file = open("hacker_news.csv")
read_file = reader(opened_file)
hn = list(read_file)
print(hn[:5])


[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01']]


In [46]:
print(hn[:5])


[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01']]


In [47]:
ask_posts = []
show_posts = []
other_posts = []

for row in hn:
    title = row[1]
    if title.lower().startswith('ask hn'):
        ask_posts.append(row)
    elif title.lower().startswith('show hn'):
        show_posts.append(row)
    else:
        other_posts.append(row)
print("Number of Ask HN posts:", len(ask_posts))
print("Number of Show HN posts:", len(show_posts))
print("Number of all other posts:", len(other_posts))


Number of Ask HN posts: 1744
Number of Show HN posts: 1162
Number of all other posts: 17195


In [48]:
total_ask_comments = 0
for row in ask_posts:
    num_comments = int(row[4])
    total_ask_comments += num_comments
avg_ask_comments = total_ask_comments / len(ask_posts)
print("Ask HN - Average Number of Comments:", avg_ask_comments)

total_show_comments = 0
for row in show_posts:
    num_comments = int(row[4])
    total_show_comments += num_comments
avg_show_comments = total_show_comments / len(show_posts)
print("Show HN - Average Number of Comments:", avg_show_comments)

Ask HN - Average Number of Comments: 14.038417431192661
Show HN - Average Number of Comments: 10.31669535283993


Based on the analysis above, "Ask HN" posts receive on average four more comments than "Show HN" posts.

In [49]:
import datetime as dt

result_list = []
for row in ask_posts:
    created_at = row[6]
    num_comments = int(row[4])
    result_list.append([created_at, num_comments])

print(result_list[:5])

counts_by_hour = {}
comments_by_hour = {}

for row in result_list:
        date = row[0]
        comments = row[1]
        dt_object = dt.datetime.strptime(date, "%m/%d/%Y %H:%M")
        hour = dt_object.hour
        
        if hour not in counts_by_hour:
            counts_by_hour[hour] = 1
            comments_by_hour[hour] = comments
            
        else:
            counts_by_hour[hour] += 1
            comments_by_hour[hour] += comments
comments_by_hour           

[['8/16/2016 9:55', 6], ['11/22/2015 13:43', 29], ['5/2/2016 10:14', 1], ['8/2/2016 14:20', 3], ['10/15/2015 16:38', 17]]


{0: 447,
 1: 683,
 2: 1381,
 3: 421,
 4: 337,
 5: 464,
 6: 397,
 7: 267,
 8: 492,
 9: 251,
 10: 793,
 11: 641,
 12: 687,
 13: 1253,
 14: 1416,
 15: 4477,
 16: 1814,
 17: 1146,
 18: 1439,
 19: 1188,
 20: 1722,
 21: 1745,
 22: 479,
 23: 543}

In [50]:
avg_by_hour = []

for hour in comments_by_hour:
    avg_by_hour.append([hour, comments_by_hour[hour] / counts_by_hour[hour]])

avg_by_hour

[[0, 8.127272727272727],
 [1, 11.383333333333333],
 [2, 23.810344827586206],
 [3, 7.796296296296297],
 [4, 7.170212765957447],
 [5, 10.08695652173913],
 [6, 9.022727272727273],
 [7, 7.852941176470588],
 [8, 10.25],
 [9, 5.5777777777777775],
 [10, 13.440677966101696],
 [11, 11.051724137931034],
 [12, 9.41095890410959],
 [13, 14.741176470588234],
 [14, 13.233644859813085],
 [15, 38.5948275862069],
 [16, 16.796296296296298],
 [17, 11.46],
 [18, 13.20183486238532],
 [19, 10.8],
 [20, 21.525],
 [21, 16.009174311926607],
 [22, 6.746478873239437],
 [23, 7.985294117647059]]

In [62]:
swap_avg_by_hour = []

for row in avg_by_hour:
    swap_avg_by_hour.append([row[1], row[0]])
    
print(swap_avg_by_hour)
print('\n')
sorted_swap = sorted(swap_avg_by_hour, reverse=True)

print("Top 5 Hours for 'Ask HN' Comments")
for row in sorted_swap[:5]:
    avg = round(row[0], 2)
    hour = str(row[1])
    time_form = "%H"
    
    convert_to_cst = dt.datetime.strptime(hour, time_form)
    cst = convert_to_cst - dt.timedelta(hours=1)
    cst = cst.strftime("%H:%M")     
   
   
    print("{0}: {1} average comments per post".format(cst, avg))

[[8.127272727272727, 0], [11.383333333333333, 1], [23.810344827586206, 2], [7.796296296296297, 3], [7.170212765957447, 4], [10.08695652173913, 5], [9.022727272727273, 6], [7.852941176470588, 7], [10.25, 8], [5.5777777777777775, 9], [13.440677966101696, 10], [11.051724137931034, 11], [9.41095890410959, 12], [14.741176470588234, 13], [13.233644859813085, 14], [38.5948275862069, 15], [16.796296296296298, 16], [11.46, 17], [13.20183486238532, 18], [10.8, 19], [21.525, 20], [16.009174311926607, 21], [6.746478873239437, 22], [7.985294117647059, 23]]


Top 5 Hours for 'Ask HN' Comments
14:00: 38.59 average comments per post
01:00: 23.81 average comments per post
19:00: 21.52 average comments per post
15:00: 16.8 average comments per post
20:00: 16.01 average comments per post
