In [66]:
import pickle

import psycopg2

import pandas as pd
import pandas.io.sql as sqlio

from gensim.similarities.docsim import Similarity
from gensim.corpora import MmCorpus
from gensim.matutils import corpus2csc

from joblib import Parallel, delayed, cpu_count
import funcy as fp

In [103]:
def save_pickle(file_name, object_to_pickle):
    with open(file_name, 'wb') as f:
        pickle.dump(object_to_pickle, f)
        
def load_pickle(file_name):
    with open(file_name, 'rb') as f:
        unpickled_object = pickle.load(f)
    return unpickled_object

In [62]:
def _series_chunks(s, n_jobs):
    if n_jobs < 0:
        # so, have n chunks if we are using all n cores/cpus = cpu_count() + 1 + n_jobs
        n_jobs = cpu_count() + 1 + n_jobs
    n = len(s)
    n_chunks = int(n / n_jobs)
    return (s.iloc[ilocs] for ilocs in fp.chunks(n_chunks, range(n)))

def series_pmap(s, f, n_jobs=-1):
    if n_jobs == 0:
        return s.map(f)
    return pd.concat(Parallel(n_jobs=n_jobs)(delayed(series_pmap)(sub_series, f, n_jobs=0) \
                                                 for sub_series in _series_chunks(s, n_jobs)))

In [28]:
conn = psycopg2.connect("host=localhost dbname=postgres user=postgres")
stories_df = sqlio.read_sql_query("SELECT id, score, author, title, url FROM bq_all WHERE all_text IS NOT NULL AND all_text != '' AND article_content IS NOT NULL AND article_content != '' ORDER BY score DESC, story_time DESC LIMIT 25000", conn, index_col='id')

CPU times: user 43.3 ms, sys: 15.6 ms, total: 58.9 ms
Wall time: 615 ms


In [83]:
tag_names = ['Python',
 'Mobile',
 'Design',
 'Security',
 'Blockchain',
 'AI/Machine Learning',
 'Google',
 'Microsoft',
 'Apple',
 'Facebook',
 'Amazon',
 'Startups',
 'Politics',
 'Databases',
 'Linux',
 'Data Science',
 'Science',
 'Math',
 'Javascript',
 'Web Dev',
 'DevOps',
 'Hardware/IoT',
 'AR/VR',
 'Games']

id2index_map = {story_id: index for index, story_id in enumerate(stories_df.index)}

In [99]:
def get_series_index(story_id):
    return id2index_map[story_id]

def get_sim_content_ids(story_id):
    series_index = get_series_index(story_id)
    vec = content_corpus[series_index]
    sims = content_indexer[vec]
    sim_stories = [stories_df.index[sim_index] for sim_index, sim_score in sims]
    return sim_stories[1:]

def get_sim_user_ids(story_id):
    series_index = get_series_index(story_id)
    vec = user_corpus[series_index]
    sims = user_indexer[vec]
    sim_stories = [stories_df.index[sim_index] for sim_index, sim_score in sims]
    return sim_stories[1:]

In [7]:
content_indexer = Similarity.load('lsi_300_indexer.model')
content_corpus = MmCorpus('lsi_300_corpus.mm')
content_csc = corpus2csc(content_corpus)

In [98]:
user_indexer = Similarity.load('commenters_indexer.model')
user_corpus = MmCorpus('commenters_corpus.mm')

In [71]:
tagger_model = load_pickle('tagger_model.pkl')
tag_results = tagger_model.predict(content_csc.T).todense()

In [75]:
tagged_stories_df = pd.DataFrame(tag_results, columns=tag_names)
tagged_stories_df["story_id"] = stories_df.index.values
tagged_stories_df = tagged_stories_df.set_index(keys=["story_id"], drop=False)
stories_df[tag_names] = tagged_stories_df[tag_names]

In [76]:
stories_df.head()

Unnamed: 0_level_0,score,author,title,url,Python,Mobile,Design,Security,Blockchain,AI/Machine Learning,...,Data Science,Science,Math,Javascript,Web Dev,DevOps,Hardware/IoT,AR/VR,Games,story_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16582136,6015,Cogito,Stephen Hawking has died,http://www.bbc.com/news/uk-43396008,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,16582136
11116274,5771,epaga,A Message to Our Customers,http://www.apple.com/customer-letter/,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11116274
13682022,4107,grey-area,"Reflecting on one very, very strange year at Uber",https://www.susanjfowler.com/blog/2017/2/19/re...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,13682022
13718752,3238,tptacek,Cloudflare Reverse Proxies Are Dumping Uniniti...,https://bugs.chromium.org/p/project-zero/issue...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,13718752
11966167,3125,dmmalam,UK votes to leave EU,http://www.bbc.co.uk/news/uk-politics-36615028,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11966167


In [92]:
%%time 

stories_df["content_recs"] = series_pmap(stories_df["story_id"], get_sim_content_ids)

CPU times: user 485 ms, sys: 152 ms, total: 636 ms
Wall time: 8min 40s


In [97]:
stories_df["content_recs"].head()

id
16582136    [12238197, 16583822, 12062988, 10768391, 12994...
11116274    [11150107, 11116801, 11236416, 11133082, 11155...
13682022    [13747414, 13684439, 15524669, 13725529, 13784...
13718752    [10192273, 13766339, 12105188, 10257932, 12091...
11966167    [13985192, 11964880, 13856961, 11967478, 15568...
Name: content_recs, dtype: object

In [100]:
%%time 

stories_df["user_recs"] = series_pmap(stories_df["story_id"], get_sim_user_ids)

CPU times: user 574 ms, sys: 168 ms, total: 742 ms
Wall time: 9min 31s


In [101]:
stories_df["user_recs"].head()

id
16582136      [11771737, 4372563, 8504931, 14112748, 3719005]
11116274    [12772925, 15253659, 16476454, 12456569, 12180...
13682022      [4755470, 13902938, 7250505, 11377716, 8598652]
13718752    [16077873, 15948489, 6991590, 16216329, 15843957]
11966167     [15178970, 12657249, 6312100, 6859245, 14958329]
Name: user_recs, dtype: object

In [104]:
save_pickle('stories_df.pkl', stories_df)

In [105]:
stories_df.head()

Unnamed: 0_level_0,score,author,title,url,Python,Mobile,Design,Security,Blockchain,AI/Machine Learning,...,Math,Javascript,Web Dev,DevOps,Hardware/IoT,AR/VR,Games,story_id,content_recs,user_recs
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16582136,6015,Cogito,Stephen Hawking has died,http://www.bbc.com/news/uk-43396008,0,0,0,0,0,0,...,0,0,0,0,0,0,0,16582136,"[12238197, 16583822, 12062988, 10768391, 12994...","[11771737, 4372563, 8504931, 14112748, 3719005]"
11116274,5771,epaga,A Message to Our Customers,http://www.apple.com/customer-letter/,0,1,0,0,0,0,...,0,0,0,0,0,0,0,11116274,"[11150107, 11116801, 11236416, 11133082, 11155...","[12772925, 15253659, 16476454, 12456569, 12180..."
13682022,4107,grey-area,"Reflecting on one very, very strange year at Uber",https://www.susanjfowler.com/blog/2017/2/19/re...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,13682022,"[13747414, 13684439, 15524669, 13725529, 13784...","[4755470, 13902938, 7250505, 11377716, 8598652]"
13718752,3238,tptacek,Cloudflare Reverse Proxies Are Dumping Uniniti...,https://bugs.chromium.org/p/project-zero/issue...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,13718752,"[10192273, 13766339, 12105188, 10257932, 12091...","[16077873, 15948489, 6991590, 16216329, 15843957]"
11966167,3125,dmmalam,UK votes to leave EU,http://www.bbc.co.uk/news/uk-politics-36615028,0,0,0,0,0,0,...,0,0,0,0,0,0,0,11966167,"[13985192, 11964880, 13856961, 11967478, 15568...","[15178970, 12657249, 6312100, 6859245, 14958329]"


In [106]:
summaries_df = sqlio.read_sql_query("SELECT id, article_summary FROM bq_all WHERE all_text IS NOT NULL AND all_text != '' AND article_content IS NOT NULL AND article_content != '' ORDER BY score DESC, story_time DESC LIMIT 25000", conn, index_col='id')

In [126]:
save_pickle('summaries_df.pkl', summaries_df)

In [112]:
pd.set_option('display.max_colwidth', 500)

In [None]:
Media playback is unsupported on your device Media caption

In [125]:
summaries_df.sample(500)

Unnamed: 0_level_0,article_summary
id,Unnamed: 1_level_1
12573173,"Society has a rich history of people seizing on social evolution as an excuse for bad manners.\nGood manners are mere mannerisms, the argument goes, which serve only to put barriers in the way of deeper connections.\nIt’s the looser ties, the ones that have to be created or re-created at each meeting, that are tough.\nWhen I skip big gatherings of strangers, I’m not just being a little rude to the individual people around me, I’m being uncivil in a larger sense.\nThat doesn’t have to mean we..."
8644175,"Their team is growing because of advancements they've made, and you want to recognize the work they've done with something.\nThe obvious answer is to put them in charge of the team they've built, especially as they're the de-facto leader of the team already.\nThose are the fun parts of any engineer's job, and indeed all engineers must exercise some degree of technical leadership.\nDeveloping a strategy for positioning your technical thought leaders as the technical thought leaders is empower..."
4095814,"The MacBook Pro got a widely anticipated Retina Display graphics update today, along with a new design that makes it almost as lightweight as a MacBook Air.\nNo updates to any 17-inch MacBook Pro models were announced.\nThe premium 15-inch model is even thinner than the 13-inch MacBook Pro, which comes in at 0.95 inches thick and 4.5 pounds.\nThe ""low-end"" 15-inch MacBook Pro is also 0.95 inches thick, weighing 5.6 pounds, with prices starting at $1,799.\nWhile the 13-inch MacBook Pro will o..."
16258263,"Riot.im is the powerful decentralized collaboration tool built on the Matrix protocol.\n“It gives us the freedom to expand our team, and continue improving Riot.im and Matrix in an open source environment.\nBuilt on the Matrix open protocol, Riot.im bridges users to any Matrix-bridged apps (such as IRC and Slack) and any Matrix-compliant clients.\nOur investment in Riot.im represents our commitment to open source projects and the communities that drive them.\nBecause we believe thoughtful, w..."
5065089,"If I was starting out with iOS development today these are the things I would hope somebody would tell me.\nAlthough it's valuable to understand memory management, ARC makes life a whole lot easier.\nEmbrace Open SourceThere are so many amazing libraries and components available for iOS development.\nThere are many examples of this, Tweet Sheets in iOS 5, SKStoreProductViewController & UIActivityViewController in iOS 6, there are many examples.\nHopefully it will be useful to at least a few ..."
9759241,"Next, the gag order.\nReason has published the gag order.\nThe government hasn't unsealed its application for a gag order.\nThat is, it will merely say ""these people said these things, we want their information, therefore, give us a gag order.""\nReason also published AUSA Velamoor's letter asking that the gag order be lifted, and the order lifting it."
13935590,"This article is all about colour switching on the Commodore 64.\nThe co-author of C64 Dragon Breed, Ashley Routledge read the story and took the time to comment with his memories!\nWe're going to generate every unique pair of colours possible on the Commodore 64 in one glorious epilepsy-bomb.\nThe EndAnd that is the story of how I read about secret C64 colours in 1991, and then twenty-six years later finally got to see them.\nCOMMODORE FORMAT issue 5, Dragon Breed review: ""Extra colours are ..."
11521079,"Bill Campbell — who garnered the name “The Coach” for the sage advice and counsel he gave numerous tech leaders from Apple’s Steve Jobs to Google’s Larry Page to Amazon’s Jeff Bezos — has died.\nAnd his family sent this statement: “Bill Campbell passed peacefully in his sleep after a long battle with cancer.\nHe was also an actual football coach at Columbia University way back when, which got him his famous nickname.\n“Steve would say, ‘If you’re helping them you’re hurting me.’ He would yel..."
12476597,"Dynamic Programming: The NameRichard Bellman, Eye of the Hurricane: an autobiography, 1984.\nAn interesting question is, Where did the name, dynamic programming, come from?\nHe was Secretary of Defense, and he actually had a pathological fear and hatred of the word, research.\nHis face would suffuse, he would turn red, and he would get violent if people used the term, research, in his presence.\nBut planning, is not a good word for various reasons."
14409269,"Little Things I Like to Do with GitWritten by Harry Roberts on CSS Wizardry.\nThankfully we can use Git to give us a head start:$ git log --oneline --no-merges <last tag>..HEADN.B.\n$ git log --oneline --no-merges HEAD..<remote>/<branch>N.B.\nFor example, let’s see what someone has been doing in a particular feature branch whilst you were on holiday:$ git checkout feature/fonts $ git fetch $ git log --oneline --no-merges ..origin/feature/fontsI have this aliased to $ git upstream .\nIn order..."


In [None]:
null
This repository has been archived by the owner.\nIt is now read-only.
You can’t perform that action at this time.\nYou signed in with another tab or window.\nReload to refresh your session.\nYou signed out in another tab or window.\nReload to refresh your session.
Tweet with a locationYou can add location information to your Tweets, such as your city or precise location, from the web and via third-party applications.\nYou always have the option to delete your Tweet location history.
Unfortunately, your browser does not support all of the required features.\nYou can still view a video.
The interactive transcript could not be loaded.\nRating is available when the video has been rented.\nThis feature is not available right now.\nPlease try again later.
Chrome Browser RecommendedThis site may not work correctly in other browsers.\nIf you would like to continue anyway, please click the button below:ContinueOr, check out the downloadable app
This Account has been suspended.\nContact your hosting provider for more information.
The page has moved to: this page
Your report has been saved.\nThanks for your input!
Media playback is unsupported on your device Media caption
Sorry, we cannot display this file.\nSorry, this file is invalid so it cannot be displayed.\nViewer requires iframe

In [156]:
import requests

def fetch_comments(story_id):
    story_json = requests.get('https://hacker-news.firebaseio.com/v0/item/{}.json'.format(story_id)).json()
    comments_ids = story_json["kids"]
    comments = []
    for each_comment_id in comments_ids:
        comment_json = requests.get('https://hacker-news.firebaseio.com/v0/item/{}.json'.format(each_comment_id)).json()
        try:
            commenter = comment_json["by"]
        except:
            commenter = "unknown"
        try:
            comment_text = comment_json["text"]
            comments.append((commenter, comment_text)) 
        except:
            pass
    return comments

In [152]:
comments_df = stories_df[["story_id"]]
comments_df

Unnamed: 0_level_0,story_id
id,Unnamed: 1_level_1
16582136,16582136
11116274,11116274
13682022,13682022
13718752,13718752
11966167,11966167
8532261,8532261
13713480,13713480
15800676,15800676
16319505,16319505
7373566,7373566


In [157]:
%%time
comments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)

JoblibSSLError: JoblibSSLError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/runpy.py in _run_code(code=<code object <module> at 0x104a28780, file "/Use...3.6/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/Users/jasminetan/anaconda2/envs/py36/lib/python...ges/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/Users/jasmi.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x104a28780, file "/Use...3.6/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/Users/jasminetan/anaconda2/envs/py36/lib/python...ges/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/Users/jasmi.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/tornado/platform/asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Uni...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/asyncio/base_events.py in run_forever(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
    417             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    418                                    finalizer=self._asyncgen_finalizer_hook)
    419         try:
    420             events._set_running_loop(self)
    421             while True:
--> 422                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_UnixS...EventLoop running=True closed=False debug=False>>
    423                 if self._stopping:
    424                     break
    425         finally:
    426             self._stopping = False

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/asyncio/base_events.py in _run_once(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
   1427                         logger.warning('Executing %s took %.3f seconds',
   1428                                        _format_handle(handle), dt)
   1429                 finally:
   1430                     self._current_handle = None
   1431             else:
-> 1432                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(15, 1)>>
   1433         handle = None  # Needed to break cycles when an exception occurs.
   1434 
   1435     def _set_coroutine_wrapper(self, enabled):
   1436         try:

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/asyncio/events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(15, 1)>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (15, 1)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/tornado/platform/asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=15, events=1)
    112             self.writers.remove(fd)
    113         del self.handlers[fd]
    114 
    115     def _handle_events(self, fd, events):
    116         fileobj, handler_func = self.handlers[fd]
--> 117         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    118 
    119     def start(self):
    120         try:
    121             old_loop = asyncio.get_event_loop()

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': '%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 5, 17, 15, 32, 38, 144562, tzinfo=tzutc()), 'msg_id': '342536b5c5a48fb843892920125bbd84', 'msg_type': 'execute_request', 'session': '0abad2c076419c6978d4dea13169edeb', 'username': '', 'version': '5.2'}, 'metadata': {}, 'msg_id': '342536b5c5a48fb843892920125bbd84', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'0abad2c076419c6978d4dea13169edeb']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': '%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 5, 17, 15, 32, 38, 144562, tzinfo=tzutc()), 'msg_id': '342536b5c5a48fb843892920125bbd84', 'msg_type': 'execute_request', 'session': '0abad2c076419c6978d4dea13169edeb', 'username': '', 'version': '5.2'}, 'metadata': {}, 'msg_id': '342536b5c5a48fb843892920125bbd84', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'0abad2c076419c6978d4dea13169edeb'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': '%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 5, 17, 15, 32, 38, 144562, tzinfo=tzutc()), 'msg_id': '342536b5c5a48fb843892920125bbd84', 'msg_type': 'execute_request', 'session': '0abad2c076419c6978d4dea13169edeb', 'username': '', 'version': '5.2'}, 'metadata': {}, 'msg_id': '342536b5c5a48fb843892920125bbd84', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = '%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = '%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)'
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='%%time\ncomments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Expr object>], cell_name='<ipython-input-157-80ac4e680b5f>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 106c0e128, execution_...rue silent=False shell_futures=True> result=None>)
   2904                     return True
   2905 
   2906             for i, node in enumerate(to_run_interactive):
   2907                 mod = ast.Interactive([node])
   2908                 code = compiler(mod, cell_name, "single")
-> 2909                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x1a1d7e86f0, file "<ipython-input-157-80ac4e680b5f>", line 1>
        result = <ExecutionResult object at 106c0e128, execution_...rue silent=False shell_futures=True> result=None>
   2910                     return True
   2911 
   2912             # Flush softspace
   2913             if softspace(sys.stdout, 0):

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x1a1d7e86f0, file "<ipython-input-157-80ac4e680b5f>", line 1>, result=<ExecutionResult object at 106c0e128, execution_...rue silent=False shell_futures=True> result=None>)
   2958         outflag = True  # happens in more places, so it's easier as default
   2959         try:
   2960             try:
   2961                 self.hooks.pre_run_code_hook()
   2962                 #rprint('Running code', repr(code_obj)) # dbg
-> 2963                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x1a1d7e86f0, file "<ipython-input-157-80ac4e680b5f>", line 1>
        self.user_global_ns = {'In': ['', 'content_indexer = Similarity.load(lsi_300_indexer.model)\n# content_corpus', 'import pickle\n\nimport psycopg2\n\nimport pandas as...from gensim.similarities.docsim import Similarity', 'content_indexer = Similarity.load(lsi_300_indexer.model)\n# content_corpus', "content_indexer = Similarity.load('lsi_300_indexer.model')\n# content_corpus", "content_indexer = Similarity.load('lsi_300_index...')\ncontent_corpus = MmCorpus('lsi_300_corpus.mm')", 'import pickle\n\nimport psycopg2\n\nimport pandas as...rt Similarity\nfrom gensim.corpora import MmCorpus', "content_indexer = Similarity.load('lsi_300_index...')\ncontent_corpus = MmCorpus('lsi_300_corpus.mm')", "user_indexer = Similarity.load('commenters_index...del')\nuser_corpus = MmCorpus('lsi_300_corpus.mm')", "tagger_model = load_pickle('tagger_model.pkl')", 'def load_pickle(file_name):\n    with open(file_n...ject = pickle.load(f)\n    return unpickled_object', "tagger_model = load_pickle('tagger_model.pkl')", 'content_corpus', 'print(content_corpus)', 'print(user_corpus)', "get_ipython().run_cell_magic('time', '', 'content_csc = corpus2csc(content_corpus)')", 'import pickle\n\nimport psycopg2\n\nimport pandas as...t MmCorpus\nfrom gensim.matutils import corpus2csc', "get_ipython().run_cell_magic('time', '', 'content_csc = corpus2csc(content_corpus)')", "get_ipython().run_cell_magic('time', '', 'tag_results = tagger_model.predict(content_csc)')", "get_ipython().run_cell_magic('time', '', 'tag_results = tagger_model.predict(content_csc.T)')", ...], 'MmCorpus': <class 'gensim.corpora.mmcorpus.MmCorpus'>, 'Out': {12: <gensim.corpora.mmcorpus.MmCorpus object>, 20: <25000x24 sparse matrix of type '<class 'numpy.i...with 26905 stored elements in LInked List format>, 23:        0   1   2   3   4   5   6   7   8   9  .....  
24999   0   0   0  

[25000 rows x 24 columns], 25:        Python  Mobile  Design  Security  Blockch...     0      0      0  

[25000 rows x 24 columns], 26:    Python  Mobile  Design  Security  Blockchain ...         0      0      0  

[5 rows x 24 columns], 29:           score     author  \
id                ... http://www.bbc.co.uk/news/uk-politics-36615028  , 30:     Python  Mobile  Design  Security  Blockchain...        0      0      0  

[20 rows x 24 columns], 31:    Python  Mobile  Design  Security  Blockchain ...        0      0      0  

[10 rows x 24 columns], 32:           score          author  \
id           ...         http://gabrielecirulli.github.io/2048/  , 34: {0: 16582136, 1: 11116274, 2: 13682022, 3: 13718752, 4: 11966167, 5: 8532261, 6: 13713480, 7: 15800676, 8: 16319505, 9: 7373566, ...}, ...}, 'Parallel': <class 'joblib.parallel.Parallel'>, 'Similarity': <class 'gensim.similarities.docsim.Similarity'>, '_':           story_id
id                
16582136  ...6063
4333774    4333774

[25000 rows x 1 columns], '_101': id
16582136       [11771737, 4372563, 8504931, 1...6859245, 14958329]
Name: user_recs, dtype: object, '_105':           score     author  \
id                ...2100, 6859245, 14958329]  

[5 rows x 31 columns], '_107':                                                 ...on.\nMr Johnson said the UK was "no less unite..., '_109':                                                 ...on.\nMr Johnson said the UK was "no less unite..., ...}
        self.user_ns = {'In': ['', 'content_indexer = Similarity.load(lsi_300_indexer.model)\n# content_corpus', 'import pickle\n\nimport psycopg2\n\nimport pandas as...from gensim.similarities.docsim import Similarity', 'content_indexer = Similarity.load(lsi_300_indexer.model)\n# content_corpus', "content_indexer = Similarity.load('lsi_300_indexer.model')\n# content_corpus", "content_indexer = Similarity.load('lsi_300_index...')\ncontent_corpus = MmCorpus('lsi_300_corpus.mm')", 'import pickle\n\nimport psycopg2\n\nimport pandas as...rt Similarity\nfrom gensim.corpora import MmCorpus', "content_indexer = Similarity.load('lsi_300_index...')\ncontent_corpus = MmCorpus('lsi_300_corpus.mm')", "user_indexer = Similarity.load('commenters_index...del')\nuser_corpus = MmCorpus('lsi_300_corpus.mm')", "tagger_model = load_pickle('tagger_model.pkl')", 'def load_pickle(file_name):\n    with open(file_n...ject = pickle.load(f)\n    return unpickled_object', "tagger_model = load_pickle('tagger_model.pkl')", 'content_corpus', 'print(content_corpus)', 'print(user_corpus)', "get_ipython().run_cell_magic('time', '', 'content_csc = corpus2csc(content_corpus)')", 'import pickle\n\nimport psycopg2\n\nimport pandas as...t MmCorpus\nfrom gensim.matutils import corpus2csc', "get_ipython().run_cell_magic('time', '', 'content_csc = corpus2csc(content_corpus)')", "get_ipython().run_cell_magic('time', '', 'tag_results = tagger_model.predict(content_csc)')", "get_ipython().run_cell_magic('time', '', 'tag_results = tagger_model.predict(content_csc.T)')", ...], 'MmCorpus': <class 'gensim.corpora.mmcorpus.MmCorpus'>, 'Out': {12: <gensim.corpora.mmcorpus.MmCorpus object>, 20: <25000x24 sparse matrix of type '<class 'numpy.i...with 26905 stored elements in LInked List format>, 23:        0   1   2   3   4   5   6   7   8   9  .....  
24999   0   0   0  

[25000 rows x 24 columns], 25:        Python  Mobile  Design  Security  Blockch...     0      0      0  

[25000 rows x 24 columns], 26:    Python  Mobile  Design  Security  Blockchain ...         0      0      0  

[5 rows x 24 columns], 29:           score     author  \
id                ... http://www.bbc.co.uk/news/uk-politics-36615028  , 30:     Python  Mobile  Design  Security  Blockchain...        0      0      0  

[20 rows x 24 columns], 31:    Python  Mobile  Design  Security  Blockchain ...        0      0      0  

[10 rows x 24 columns], 32:           score          author  \
id           ...         http://gabrielecirulli.github.io/2048/  , 34: {0: 16582136, 1: 11116274, 2: 13682022, 3: 13718752, 4: 11966167, 5: 8532261, 6: 13713480, 7: 15800676, 8: 16319505, 9: 7373566, ...}, ...}, 'Parallel': <class 'joblib.parallel.Parallel'>, 'Similarity': <class 'gensim.similarities.docsim.Similarity'>, '_':           story_id
id                
16582136  ...6063
4333774    4333774

[25000 rows x 1 columns], '_101': id
16582136       [11771737, 4372563, 8504931, 1...6859245, 14958329]
Name: user_recs, dtype: object, '_105':           score     author  \
id                ...2100, 6859245, 14958329]  

[5 rows x 31 columns], '_107':                                                 ...on.\nMr Johnson said the UK was "no less unite..., '_109':                                                 ...on.\nMr Johnson said the UK was "no less unite..., ...}
   2964             finally:
   2965                 # Reset our crash handler in place
   2966                 sys.excepthook = old_excepthook
   2967         except SystemExit as e:

...........................................................................
/Users/jasminetan/code/jt-dsi-capstone-v6/<ipython-input-157-80ac4e680b5f> in <module>()
----> 1 get_ipython().run_cell_magic('time', '', 'comments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)')

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, magic_name='time', line='', cell='comments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)')
   2162             # This will need to be updated if the internal calling logic gets
   2163             # refactored, or else we'll be expanding the wrong variables.
   2164             stack_depth = 2
   2165             magic_arg_s = self.var_expand(line, stack_depth)
   2166             with self.builtin_trap:
-> 2167                 result = fn(magic_arg_s, cell)
        result = undefined
        fn = <bound method ExecutionMagics.time of <IPython.core.magics.execution.ExecutionMagics object>>
        magic_arg_s = ''
        cell = 'comments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)'
   2168             return result
   2169 
   2170     def find_line_magic(self, magic_name):
   2171         """Find and return a line magic by name.

...........................................................................
/Users/jasminetan/code/jt-dsi-capstone-v6/<decorator-gen-62> in time(self=<IPython.core.magics.execution.ExecutionMagics object>, line='', cell='comments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', local_ns=None)

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/magic.py in <lambda>(f=<function ExecutionMagics.time>, *a=(<IPython.core.magics.execution.ExecutionMagics object>, '', 'comments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', None), **k={})
    182     validate_type(magic_kind)
    183 
    184     # This is a closure to capture the magic_kind.  We could also use a class,
    185     # but it's overkill for just that one bit of state.
    186     def magic_deco(arg):
--> 187         call = lambda f, *a, **k: f(*a, **k)
        f = <function ExecutionMagics.time>
        a = (<IPython.core.magics.execution.ExecutionMagics object>, '', 'comments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', None)
        k = {}
    188 
    189         if callable(arg):
    190             # "Naked" decorator call (just @foo, no args)
    191             func = arg

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/magics/execution.py in time(self=<IPython.core.magics.execution.ExecutionMagics object>, line='', cell='comments_df["comments"] = series_pmap(comments_df["story_id"], fetch_comments)', local_ns=None)
   1233                 return
   1234             end = clock2()
   1235         else:
   1236             st = clock2()
   1237             try:
-> 1238                 exec(code, glob, local_ns)
        code = <code object <module> at 0x1a21feec00, file "<timed exec>", line 1>
        glob = {'In': ['', 'content_indexer = Similarity.load(lsi_300_indexer.model)\n# content_corpus', 'import pickle\n\nimport psycopg2\n\nimport pandas as...from gensim.similarities.docsim import Similarity', 'content_indexer = Similarity.load(lsi_300_indexer.model)\n# content_corpus', "content_indexer = Similarity.load('lsi_300_indexer.model')\n# content_corpus", "content_indexer = Similarity.load('lsi_300_index...')\ncontent_corpus = MmCorpus('lsi_300_corpus.mm')", 'import pickle\n\nimport psycopg2\n\nimport pandas as...rt Similarity\nfrom gensim.corpora import MmCorpus', "content_indexer = Similarity.load('lsi_300_index...')\ncontent_corpus = MmCorpus('lsi_300_corpus.mm')", "user_indexer = Similarity.load('commenters_index...del')\nuser_corpus = MmCorpus('lsi_300_corpus.mm')", "tagger_model = load_pickle('tagger_model.pkl')", 'def load_pickle(file_name):\n    with open(file_n...ject = pickle.load(f)\n    return unpickled_object', "tagger_model = load_pickle('tagger_model.pkl')", 'content_corpus', 'print(content_corpus)', 'print(user_corpus)', "get_ipython().run_cell_magic('time', '', 'content_csc = corpus2csc(content_corpus)')", 'import pickle\n\nimport psycopg2\n\nimport pandas as...t MmCorpus\nfrom gensim.matutils import corpus2csc', "get_ipython().run_cell_magic('time', '', 'content_csc = corpus2csc(content_corpus)')", "get_ipython().run_cell_magic('time', '', 'tag_results = tagger_model.predict(content_csc)')", "get_ipython().run_cell_magic('time', '', 'tag_results = tagger_model.predict(content_csc.T)')", ...], 'MmCorpus': <class 'gensim.corpora.mmcorpus.MmCorpus'>, 'Out': {12: <gensim.corpora.mmcorpus.MmCorpus object>, 20: <25000x24 sparse matrix of type '<class 'numpy.i...with 26905 stored elements in LInked List format>, 23:        0   1   2   3   4   5   6   7   8   9  .....  
24999   0   0   0  

[25000 rows x 24 columns], 25:        Python  Mobile  Design  Security  Blockch...     0      0      0  

[25000 rows x 24 columns], 26:    Python  Mobile  Design  Security  Blockchain ...         0      0      0  

[5 rows x 24 columns], 29:           score     author  \
id                ... http://www.bbc.co.uk/news/uk-politics-36615028  , 30:     Python  Mobile  Design  Security  Blockchain...        0      0      0  

[20 rows x 24 columns], 31:    Python  Mobile  Design  Security  Blockchain ...        0      0      0  

[10 rows x 24 columns], 32:           score          author  \
id           ...         http://gabrielecirulli.github.io/2048/  , 34: {0: 16582136, 1: 11116274, 2: 13682022, 3: 13718752, 4: 11966167, 5: 8532261, 6: 13713480, 7: 15800676, 8: 16319505, 9: 7373566, ...}, ...}, 'Parallel': <class 'joblib.parallel.Parallel'>, 'Similarity': <class 'gensim.similarities.docsim.Similarity'>, '_':           story_id
id                
16582136  ...6063
4333774    4333774

[25000 rows x 1 columns], '_101': id
16582136       [11771737, 4372563, 8504931, 1...6859245, 14958329]
Name: user_recs, dtype: object, '_105':           score     author  \
id                ...2100, 6859245, 14958329]  

[5 rows x 31 columns], '_107':                                                 ...on.\nMr Johnson said the UK was "no less unite..., '_109':                                                 ...on.\nMr Johnson said the UK was "no less unite..., ...}
        local_ns = None
   1239             except:
   1240                 self.shell.showtraceback()
   1241                 return
   1242             end = clock2()

...........................................................................
/Users/jasminetan/code/jt-dsi-capstone-v6/<timed exec> in <module>()

...........................................................................
/Users/jasminetan/code/jt-dsi-capstone-v6/<ipython-input-62-2f9ee080a0f8> in series_pmap(s=id
16582136    16582136
11116274    11116274
136...33774
Name: story_id, Length: 25000, dtype: int64, f=<function fetch_comments>, n_jobs=-1)
      7     return (s.iloc[ilocs] for ilocs in fp.chunks(n_chunks, range(n)))
      8 
      9 def series_pmap(s, f, n_jobs=-1):
     10     if n_jobs == 0:
     11         return s.map(f)
---> 12     return pd.concat(Parallel(n_jobs=n_jobs)(delayed(series_pmap)(sub_series, f, n_jobs=0)                                                  for sub_series in _series_chunks(s, n_jobs)))

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object series_pmap.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
SSLError                                           Fri May 18 03:35:45 2018
PID: 61490   Python 3.6.5: /Users/jasminetan/anaconda2/envs/py36/bin/python
...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/joblib/parallel.py in __call__(self=<joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function series_pmap>, (id
16582136    16582136
11116274    11116274
136...526446
Name: story_id, Length: 6250, dtype: int64, <function fetch_comments>), {'n_jobs': 0})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function series_pmap>
        args = (id
16582136    16582136
11116274    11116274
136...526446
Name: story_id, Length: 6250, dtype: int64, <function fetch_comments>)
        kwargs = {'n_jobs': 0}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/Users/jasminetan/code/jt-dsi-capstone-v6/<ipython-input-62-2f9ee080a0f8> in series_pmap(s=id
16582136    16582136
11116274    11116274
136...526446
Name: story_id, Length: 6250, dtype: int64, f=<function fetch_comments>, n_jobs=0)
      6     n_chunks = int(n / n_jobs)
      7     return (s.iloc[ilocs] for ilocs in fp.chunks(n_chunks, range(n)))
      8 
      9 def series_pmap(s, f, n_jobs=-1):
     10     if n_jobs == 0:
---> 11         return s.map(f)
     12     return pd.concat(Parallel(n_jobs=n_jobs)(delayed(series_pmap)(sub_series, f, n_jobs=0)                                                  for sub_series in _series_chunks(s, n_jobs)))

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/pandas/core/series.py in map(self=id
16582136    16582136
11116274    11116274
136...526446
Name: story_id, Length: 6250, dtype: int64, arg=<function fetch_comments>, na_action=None)
   2349             # arg is a Series
   2350             indexer = arg.index.get_indexer(values)
   2351             new_values = algorithms.take_1d(arg._values, indexer)
   2352         else:
   2353             # arg is a function
-> 2354             new_values = map_f(values, arg)
        new_values = undefined
        map_f = <built-in function map_infer>
        values = array([16582136, 11116274, 13682022, ..., 5476025, 3571167, 3526446],
      dtype=object)
        arg = <function fetch_comments>
   2355 
   2356         return self._constructor(new_values,
   2357                                  index=self.index).__finalize__(self)
   2358 

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/pandas/_libs/lib.cpython-36m-darwin.so in pandas._libs.lib.map_infer()

...........................................................................
/Users/jasminetan/code/jt-dsi-capstone-v6/<ipython-input-156-05ae9d3209bd> in fetch_comments(story_id=11807450)
      3 def fetch_comments(story_id):
      4     story_json = requests.get('https://hacker-news.firebaseio.com/v0/item/{}.json'.format(story_id)).json()
      5     comments_ids = story_json["kids"]
      6     comments = []
      7     for each_comment_id in comments_ids:
----> 8         comment_json = requests.get('https://hacker-news.firebaseio.com/v0/item/{}.json'.format(each_comment_id)).json()
      9         try:
     10             commenter = comment_json["by"]
     11         except:
     12             commenter = "unknown"

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/requests/api.py in get(url='https://hacker-news.firebaseio.com/v0/item/11807774.json', params=None, **kwargs={'allow_redirects': True})
     67     :return: :class:`Response <Response>` object
     68     :rtype: requests.Response
     69     """
     70 
     71     kwargs.setdefault('allow_redirects', True)
---> 72     return request('get', url, params=params, **kwargs)
        url = 'https://hacker-news.firebaseio.com/v0/item/11807774.json'
        params = None
        kwargs = {'allow_redirects': True}
     73 
     74 
     75 def options(url, **kwargs):
     76     r"""Sends an OPTIONS request.

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/requests/api.py in request(method='get', url='https://hacker-news.firebaseio.com/v0/item/11807774.json', **kwargs={'allow_redirects': True, 'params': None})
     53 
     54     # By using the 'with' statement we are sure the session is closed, thus we
     55     # avoid leaving sockets open which can trigger a ResourceWarning in some
     56     # cases, and look like a memory leak in others.
     57     with sessions.Session() as session:
---> 58         return session.request(method=method, url=url, **kwargs)
        session.request = <bound method Session.request of <requests.sessions.Session object>>
        method = 'get'
        url = 'https://hacker-news.firebaseio.com/v0/item/11807774.json'
        kwargs = {'allow_redirects': True, 'params': None}
     59 
     60 
     61 def get(url, params=None, **kwargs):
     62     r"""Sends a GET request.

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/requests/sessions.py in request(self=<requests.sessions.Session object>, method='get', url='https://hacker-news.firebaseio.com/v0/item/11807774.json', params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies={}, hooks=None, stream=None, verify=None, cert=None, json=None)
    503         send_kwargs = {
    504             'timeout': timeout,
    505             'allow_redirects': allow_redirects,
    506         }
    507         send_kwargs.update(settings)
--> 508         resp = self.send(prep, **send_kwargs)
        resp = undefined
        self.send = <bound method Session.send of <requests.sessions.Session object>>
        prep = <PreparedRequest [GET]>
        send_kwargs = {'allow_redirects': True, 'cert': None, 'proxies': OrderedDict(), 'stream': False, 'timeout': None, 'verify': True}
    509 
    510         return resp
    511 
    512     def get(self, url, **kwargs):

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/requests/sessions.py in send(self=<requests.sessions.Session object>, request=<PreparedRequest [GET]>, **kwargs={'cert': None, 'proxies': OrderedDict(), 'stream': False, 'timeout': None, 'verify': True})
    613 
    614         # Start time (approximately) of the request
    615         start = preferred_clock()
    616 
    617         # Send the request
--> 618         r = adapter.send(request, **kwargs)
        r = undefined
        adapter.send = <bound method HTTPAdapter.send of <requests.adapters.HTTPAdapter object>>
        request = <PreparedRequest [GET]>
        kwargs = {'cert': None, 'proxies': OrderedDict(), 'stream': False, 'timeout': None, 'verify': True}
    619 
    620         # Total elapsed time of the request (approximately)
    621         elapsed = preferred_clock() - start
    622         r.elapsed = timedelta(seconds=elapsed)

...........................................................................
/Users/jasminetan/anaconda2/envs/py36/lib/python3.6/site-packages/requests/adapters.py in send(self=<requests.adapters.HTTPAdapter object>, request=<PreparedRequest [GET]>, stream=False, timeout=<urllib3.util.timeout.Timeout object>, verify=True, cert=None, proxies=OrderedDict())
    501             if isinstance(e.reason, _ProxyError):
    502                 raise ProxyError(e, request=request)
    503 
    504             if isinstance(e.reason, _SSLError):
    505                 # This branch is for urllib3 v1.22 and later.
--> 506                 raise SSLError(e, request=request)
        e = undefined
        request = <PreparedRequest [GET]>
    507 
    508             raise ConnectionError(e, request=request)
    509 
    510         except ClosedPoolError as e:

SSLError: HTTPSConnectionPool(host='hacker-news.firebaseio.com', port=443): Max retries exceeded with url: /v0/item/11807774.json (Caused by SSLError(SSLError("bad handshake: SysCallError(-1, 'Unexpected EOF')",),))
___________________________________________________________________________

In [158]:
comments_df.head()

Unnamed: 0_level_0,story_id
id,Unnamed: 1_level_1
16582136,16582136
11116274,11116274
13682022,13682022
13718752,13718752
11966167,11966167


In [159]:
comments_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25000 entries, 16582136 to 4333774
Data columns (total 1 columns):
story_id    25000 non-null int64
dtypes: int64(1)
memory usage: 1.0 MB
