In [1]:
import re
import pickle
from itertools import chain
from collections import namedtuple
from datetime import datetime

import pandas as pd

In [2]:
Event = namedtuple('Event', 'id, card, abstract, authors, title, datetime, etype')

In [3]:
with open('nips2016-presenting.pk', 'rb') as fin:
    events = pickle.load(fin)

In [4]:
events_frame = pd.DataFrame(list(events.values()), columns=Event._fields)

In [5]:
events_frame.head()

Unnamed: 0,id,card,abstract,authors,title,datetime,etype
0,6190,"<div class=""maincard narrower InvitedTalkBreim...",Modern data sets usually present multiple leve...,Susan Holmes,Reproducible Research: the Case of the Human M...,Thu Dec 8th 09:50 -- 10:40 AM @ Area 1 + 2,Invited Talk (Breiman Lecture)
1,6192,"<div class=""maincard narrower InvitedTalk"" id=...","Robust, efficient, and low-cost networks are a...",Saket Navlakha,Engineering Principles From Stable and Develop...,Tue Dec 6th 03:00 -- 03:50 PM @ Area 1 + 2,Invited Talk
2,6193,"<div class=""maincard narrower InvitedTalk"" id=...",The biosphere is a stupendously complex and po...,Drew Purves,Intelligent Biosphere,Tue Dec 6th 09:00 -- 09:50 AM @ Area 1+2,Invited Talk
3,6194,"<div class=""maincard narrower InvitedTalk"" id=...",A new generation of high-performance robots is...,Marc Raibert,Dynamic Legged Robots,Wed Dec 7th 03:00 -- 03:50 PM @ Area 1 + 2,Invited Talk
4,6195,"<div class=""maincard narrower InvitedTalk"" id=...",Particle physics aims to answer profound quest...,Kyle Cranmer,Machine Learning and Likelihood-Free Inference...,Wed Dec 7th 09:00 -- 09:50 AM @ Area 1 + 2,Invited Talk


## Let's stalk authors!!

In [6]:
author_list = list(chain(*[map(lambda n: n.strip(), a.split('·')) 
                           for a in events_frame['authors']]))

In [7]:
# Ooooo... so many papers with repeated authors...
# Or authors with repeated names...
len(author_list), len(set(author_list)) 

(2485, 1839)

## Only 6 Machine Translation papers?!

In [8]:
len([i for i in events_frame['abstract'] if 'machine translation' in i.strip()])

6

## Date munging

In [9]:
def munge_time(s):
    time, space = s.split(' @ ')
    try: # Don't cross AM -- PM
        day, month, date, start, _, end, ampm2 = time.split()
        ampm1 = ampm2
    except ValueError: # When crossing AM -- PM
        day, month, date, start, ampm1, _, end, ampm2 = time.split()
    date = re.findall(r'\d+',date)[0]
    start_time = ' '.join([month, date, '2016', start+ampm1])
    end_time = ' '.join([month, date, '2016', end+ampm2])
    return day, start_time, end_time

In [10]:
s = 'Tue Dec 6th 03:00 -- 03:50 PM @ Area 1 + 2'
day, start, end = munge_time(s)

In [11]:
datetime.strptime(start, '%b %d %Y %I:%M%p')

datetime.datetime(2016, 12, 6, 15, 0)

In [12]:
print (datetime.strptime(start, '%b %d %Y %I:%M%p'))

2016-12-06 15:00:00


In [13]:
for i, row in events_frame.iterrows():
    ##print (row.datetime)
    day, start, end = munge_time(row.datetime)
    events_frame.set_value(i,'day',day)
    events_frame.set_value(i,'start_time',start)
    events_frame.set_value(i,'end_time',end)

In [14]:
events_frame

Unnamed: 0,id,card,abstract,authors,title,datetime,etype,day,start_time,end_time
0,6190,"<div class=""maincard narrower InvitedTalkBreim...",Modern data sets usually present multiple leve...,Susan Holmes,Reproducible Research: the Case of the Human M...,Thu Dec 8th 09:50 -- 10:40 AM @ Area 1 + 2,Invited Talk (Breiman Lecture),Thu,Dec 8 2016 09:50AM,Dec 8 2016 10:40AM
1,6192,"<div class=""maincard narrower InvitedTalk"" id=...","Robust, efficient, and low-cost networks are a...",Saket Navlakha,Engineering Principles From Stable and Develop...,Tue Dec 6th 03:00 -- 03:50 PM @ Area 1 + 2,Invited Talk,Tue,Dec 6 2016 03:00PM,Dec 6 2016 03:50PM
2,6193,"<div class=""maincard narrower InvitedTalk"" id=...",The biosphere is a stupendously complex and po...,Drew Purves,Intelligent Biosphere,Tue Dec 6th 09:00 -- 09:50 AM @ Area 1+2,Invited Talk,Tue,Dec 6 2016 09:00AM,Dec 6 2016 09:50AM
3,6194,"<div class=""maincard narrower InvitedTalk"" id=...",A new generation of high-performance robots is...,Marc Raibert,Dynamic Legged Robots,Wed Dec 7th 03:00 -- 03:50 PM @ Area 1 + 2,Invited Talk,Wed,Dec 7 2016 03:00PM,Dec 7 2016 03:50PM
4,6195,"<div class=""maincard narrower InvitedTalk"" id=...",Particle physics aims to answer profound quest...,Kyle Cranmer,Machine Learning and Likelihood-Free Inference...,Wed Dec 7th 09:00 -- 09:50 AM @ Area 1 + 2,Invited Talk,Wed,Dec 7 2016 09:00AM,Dec 7 2016 09:50AM
5,6196,"<div class=""maincard narrower InvitedTalk"" id=...","Quantifying mental states and identifying ""sta...",Irina Rish,Learning About the Brain: Neuroimaging and Beyond,Thu Dec 8th 09:00 -- 09:50 AM @ Area 1 + 2,Invited Talk,Thu,Dec 8 2016 09:00AM,Dec 8 2016 09:50AM
6,6197,"<div class=""maincard narrower InvitedTalkPosne...",Deep learning has been at the root of signific...,Yann LeCun,Predictive Learning,Mon Dec 5th 05:30 -- 06:20 PM @ area 1 + 2,Invited Talk (Posner Lecture),Mon,Dec 5 2016 05:30PM,Dec 5 2016 06:20PM
7,6198,"<div class=""maincard narrower Tutorial"" id=""ma...",Deep Reinforcement Learning (Deep RL) has seen...,Pieter Abbeel · John Schulman,Deep Reinforcement Learning Through Policy Opt...,Mon Dec 5th 08:30 -- 10:30 AM @ Rooms 211 + 212,Tutorial,Mon,Dec 5 2016 08:30AM,Dec 5 2016 10:30AM
8,6199,"<div class=""maincard narrower Tutorial"" id=""ma...",One of the core problems of modern statistics ...,David Blei · Shakir Mohamed · Rajesh Ranganath,Variational Inference: Foundations and Modern ...,Mon Dec 5th 08:30 -- 10:30 AM @ Area 1 + 2,Tutorial,Mon,Dec 5 2016 08:30AM,Dec 5 2016 10:30AM
9,6200,"<div class=""maincard narrower Tutorial"" id=""ma...",Stochastic optimization lies at the heart of m...,Suvrit Sra · Francis Bach,Large-Scale Optimization: Beyond Stochastic Gr...,Mon Dec 5th 02:30 -- 04:30 PM @ Rooms 211 + 212,Tutorial,Mon,Dec 5 2016 02:30PM,Dec 5 2016 04:30PM
