Skip to content

Commit

Permalink
Add Mission 267 Solutions
Browse files Browse the repository at this point in the history
  • Loading branch information
spiside committed Jan 4, 2018
1 parent 178fee9 commit 4fcdc73
Showing 1 changed file with 123 additions and 0 deletions.
123 changes: 123 additions & 0 deletions Mission267Solutions.ipynb
@@ -0,0 +1,123 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('new', 186), ('google', 168), ('bitcoin', 102), ('open', 93), ('programming', 91), ('web', 89), ('data', 86), ('video', 80), ('python', 76), ('code', 73), ('facebook', 72), ('released', 72), ('using', 71), ('javascript', 66), ('2013', 66), ('free', 65), ('source', 65), ('game', 64), ('internet', 63), ('microsoft', 60), ('c', 60), ('linux', 59), ('app', 58), ('pdf', 56), ('language', 55), ('work', 55), ('software', 53), ('2014', 53), ('startup', 52), ('apple', 51), ('make', 51), ('use', 51), ('time', 49), ('yc', 49), ('security', 49), ('github', 46), ('nsa', 46), ('windows', 45), ('like', 42), ('world', 42), ('way', 42), ('computer', 41), ('heartbleed', 41), ('1', 41), ('project', 41), ('design', 38), ('users', 38), ('dont', 38), ('ios', 38), ('git', 38), ('vs', 37), ('developer', 37), ('os', 37), ('life', 37), ('twitter', 37), ('ceo', 37), ('day', 36), ('big', 36), ('online', 35), ('android', 35), ('years', 34), ('simple', 34), ('court', 34), ('mt', 33), ('apps', 33), ('says', 33), ('api', 33), ('browser', 33), ('guide', 33), ('learning', 33), ('mozilla', 32), ('site', 32), ('gox', 32), ('firefox', 32), ('engine', 32), ('problem', 32), ('server', 32), ('fast', 32), ('amazon', 31), ('year', 31), ('introducing', 31), ('support', 30), ('better', 30), ('stop', 30), ('million', 30), ('text', 30), ('people', 30), ('built', 30), ('does', 29), ('development', 29), ('tech', 29), ('3', 29), ('just', 28), ('inside', 28), ('did', 28), ('library', 28), ('money', 28), ('website', 28), ('chrome', 28), ('2048', 28)]\n"
]
}
],
"source": [
"from datetime import datetime\n",
"import json\n",
"import io\n",
"import string\n",
"\n",
"from pipeline import build_csv, Pipeline\n",
"from stop_words import stop_words\n",
"\n",
"pipeline = Pipeline()\n",
"\n",
"@pipeline.task()\n",
"def file_to_json():\n",
" with open('hn_stories_2014.json', 'r') as f:\n",
" data = json.load(f)\n",
" stories = data['stories']\n",
" return stories\n",
"\n",
"@pipeline.task(depends_on=file_to_json)\n",
"def filter_stories(stories):\n",
" def is_popular(story):\n",
" return story['points'] > 50 and story['num_comments'] > 1 and not story['title'].startswith('Ask HN')\n",
" \n",
" return (\n",
" story for story in stories\n",
" if is_popular(story)\n",
" )\n",
"\n",
"@pipeline.task(depends_on=filter_stories)\n",
"def json_to_csv(stories):\n",
" lines = []\n",
" for story in stories:\n",
" lines.append(\n",
" (story['objectID'], datetime.strptime(story['created_at'], \"%Y-%m-%dT%H:%M:%SZ\"), story['url'], story['points'], story['title'])\n",
" )\n",
" return build_csv(lines, header=['objectID', 'created_at', 'url', 'points', 'title'], file=io.StringIO())\n",
"\n",
"@pipeline.task(depends_on=json_to_csv)\n",
"def extract_titles(csv_file):\n",
" reader = csv.reader(csv_file)\n",
" header = next(reader)\n",
" idx = header.index('title')\n",
" \n",
" return (line[idx] for line in reader)\n",
"\n",
"@pipeline.task(depends_on=extract_titles)\n",
"def clean_title(titles):\n",
" for title in titles:\n",
" title = title.lower()\n",
" title = ''.join(c for c in title if c not in string.punctuation)\n",
" yield title\n",
"\n",
"@pipeline.task(depends_on=clean_title)\n",
"def build_keyword_dictionary(titles):\n",
" word_freq = {}\n",
" for title in titles:\n",
" for word in title.split(' '):\n",
" if word and word not in stop_words:\n",
" if word not in word_freq:\n",
" word_freq[word] = 1\n",
" word_freq[word] += 1\n",
" return word_freq\n",
"\n",
"@pipeline.task(depends_on=build_keyword_dictionary)\n",
"def top_keywords(word_freq):\n",
" freq_tuple = [\n",
" (word, word_freq[word])\n",
" for word in sorted(word_freq, key=word_freq.get, reverse=True)\n",
" ]\n",
" return freq_tuple[:100]\n",
"\n",
"ran = pipeline.run()\n",
"print(ran[top_keywords])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

0 comments on commit 4fcdc73

Please sign in to comment.