In [13]:
import requests
import bs4
import pandas
import qgrid

In [2]:
def get_presentation_soups(root_url, presentation_links):
    responses = (requests.get(root_url + presentation_link) for presentation_link in presentation_links)
    soups = [bs4.BeautifulSoup(r.text) for r in responses]
    return soups

In [105]:
def get_attributes(presentation_soup):
    return dict(
        description=get_description(presentation_soup),
        abstract=get_abstract(presentation_soup),
        level=get_level(presentation_soup),
        time=get_time(presentation_soup),
        Time=get_time(presentation_soup).upper(),
        speaker=get_speaker(presentation_soup),
        title=get_title(presentation_soup),
    )
def get_description(presentation_soup):
    return [div.text for div in presentation_soup.findAll('div') if 'description' in div['class']][0]
def get_abstract(presentation_soup):
    return [div.text for div in presentation_soup.findAll('div') if 'abstract' in div['class']][0]
def get_level(presentation_soup):
    return [div.dd.text for div in presentation_soup.findAll('div') if 'col-md-8' in div['class']][0]
def get_time(presentation_soup):
    return [' '.join(h4.text.split()) for div in presentation_soup.findAll('div') if 'col-md-8' in div['class'] for h4 in div.findAll('h4')][0]
def get_speaker(presentation_soup):
    return [' '.join(h4.text.split()) for div in presentation_soup.findAll('div') if 'col-md-8' in div['class'] for h4 in div.findAll('h4')][1]
def get_title(presentation_soup):
    return [h2.text for div in presentation_soup.findAll('div') if 'col-md-8' in div['class'] for h2 in div.findAll('h2')][0]
def get_colmd8(presentation_soup):
    return [div.text for div in presentation_soup.findAll('div') if 'col-md-8' in div['class']]


In [4]:
root_url = "http://london.pydata.org"
schedule_url = root_url + "/schedule/"
schedule_r = requests.get(schedule_url)
schedule_soup = bs4.BeautifulSoup(schedule_r.text)
presentation_links = [a.get('href') for a in schedule_soup.findAll('a') if 'presentation' in a.get('href')]

In [106]:
presentation_soups = get_presentation_soups(root_url, presentation_links)

In [107]:
schedule_data = [get_attributes(s) for s in presentation_soups if get_speaker(s) != '--']
schedule_df = pandas.DataFrame(schedule_data)

In [81]:
# copy from google docs
raw = """
Ian Huston		
Graham Markall		
Sarah Bird	https://github.com/bokeh/bokeh-notebooks/tree/master/tutorial	
Peadar Coyle		https://github.com/springcoil/TutorialPyMCRugby
Tejas Khot		
Demeter Sztanko		
Chih-Chun Chen	https://github.com/cambridgecoding/pydata-tutorial	
Norberto Leite		
Sahan Bulathwela	http://nbviewer.ipython.org/gist/skimaria/94f2a09ef090f6132f62	
Yves Hilpisch		https://github.com/yhilpisch/pydlon15
Saeed Amen		
Ron Roostan		
Eddie Bell		
Romain Guillebert		
Tyler Reddy		https://github.com/tylerjereddy/py_sphere_Voronoi
Kyran Dale		
Miguel Vaz		
Ricardo Corral Corral		
Yiannis Pavlosoglou		
Kevin Keraudren	http://www.doc.ic.ac.uk/~kpk09/PyData_KevinKeraudren.pdf	
Ronert Obst		
Maria Mestre		
Alex Chamberlain		
Hendrik Heuer	https://dl.dropboxusercontent.com/u/5041011/Heuer_Hacking_Human_Language_PyData.pdf	https://github.com/h10r/topic_comparison_tool
Emma Prest		
Grigori Fursin		
Andrew Grieve		
Juan Luis Cano		
Thomas French	http://www.slideshare.net/Sandtable/pydata-london-2015-49684898	
Eric Drass		
Dylan Barth		
Thomas Greg Corcoran		
Éléonore Mayola		
Armando Vieira		
Paul Agapow		
Ajay Thampi		
Roelof Pieters		
Frank Kelly		
Camilla Montonen		
Giles Greenway		
martin goodson		
Linda Uruchurtu		
Philip Nye		
Teodora Baeva		
Tom Hunger		
Ian Ozsvald	https://speakerdeck.com/ianozsvald/ship-it-pydatalondon-2015	http://ianozsvald.com/2015/06/21/pydatalondon-2015-write-up-and-my-ship-it-talk-on-publishing-data-science-products/
James Powell		
Jeff Reback	https://github.com/jreback/pydata2015-london	https://github.com/jreback/pydata2015-london
Natalie Hockham		
Benjamin Chamberlain		
Ryan Wang	http://www.slideshare.net/wgyn/pydata-london2015	
Oleksandr Pryymak	https://docs.google.com/presentation/d/1fVMYTXcWD40aKo6_Z4iTpX2IKLZcAO13U7d5hcUN2EU/edit?usp=sharing	
Marek Mroz		
Will Franklin		
Will Usher		
Aeneas Wiener		
Arik Fraimovich		
Russel Winder		
Katie Barr		
Helena Bengtsson		
"""

In [89]:
content_data = [{'name': line.split('\t')[0],
        'slides': line.split('\t')[1],
        'code': line.split('\t')[2],
                  }
       for line in raw.strip('\n').split('\n')]
content_df = content_df = pandas.DataFrame(content_data)

In [140]:
s_tmp = """
* {Time: <30} {title} - {speaker}
""".strip()
for data in schedule_data:
    s = s_tmp.format(**data)
    print(s)
    speakers = data['speaker'].split(', ')
    for speaker in speakers:
        content = content_df[content_df.name == speaker]
        if content.shape[0]:
            code = content.code.iloc[0]
            slides = content.slides.iloc[0]
            if slides:
                print('    + [slides]({})'.format(slides))
            if code:
                print('    + [code]({})'.format(code))

* FRIDAY 10:25 A.M.–NOON         Spark.. A View from the Trenches - Sahan Bulathwela, Maria Mestre
    + [slides](http://nbviewer.ipython.org/gist/skimaria/94f2a09ef090f6132f62)
* FRIDAY 10:25 A.M.–NOON         Getting Started with Cloud Foundry for Data Science - Ian Huston
* FRIDAY 12:10 P.M.–1:45 P.M.    Probabilistic programming in sports analytics - Peadar Coyle
    + [code](https://github.com/springcoil/TutorialPyMCRugby)
* FRIDAY 12:10 P.M.–1:45 P.M.    Getting started with Bokeh / Let's build an interactive data visualization for the web..in Python! - Sarah Bird, Bryan Van de Ven
    + [slides](https://github.com/bokeh/bokeh-notebooks/tree/master/tutorial)
* FRIDAY 2:15 P.M.–3:45 P.M.     Analysis and transformation of geospatial data using Python - Demeter Sztanko
* FRIDAY 2:15 P.M.–3:45 P.M.     Accelerating Scientific Code with Numba - Graham Markall
* FRIDAY 3:55 P.M.–5:25 P.M.     Open Source Tools for Financial Time Series Analysis and Visualization - Yves Hilpisch
    + 

* FRIDAY 10:25 A.M.–NOON         Spark.. A View from the Trenches - Sahan Bulathwela, Maria Mestre
    + [slides](http://nbviewer.ipython.org/gist/skimaria/94f2a09ef090f6132f62)
* FRIDAY 10:25 A.M.–NOON         Getting Started with Cloud Foundry for Data Science - Ian Huston
* FRIDAY 12:10 P.M.–1:45 P.M.    Probabilistic programming in sports analytics - Peadar Coyle
    + [code](https://github.com/springcoil/TutorialPyMCRugby)
* FRIDAY 12:10 P.M.–1:45 P.M.    Getting started with Bokeh / Let's build an interactive data visualization for the web..in Python! - Sarah Bird, Bryan Van de Ven
    + [slides](https://github.com/bokeh/bokeh-notebooks/tree/master/tutorial)
* FRIDAY 2:15 P.M.–3:45 P.M.     Analysis and transformation of geospatial data using Python - Demeter Sztanko
* FRIDAY 2:15 P.M.–3:45 P.M.     Accelerating Scientific Code with Numba - Graham Markall
* FRIDAY 3:55 P.M.–5:25 P.M.     Open Source Tools for Financial Time Series Analysis and Visualization - Yves Hilpisch
    + [code](https://github.com/yhilpisch/pydlon15)
* FRIDAY 3:55 P.M.–5:25 P.M.     How “good” is your model, and how can you make it better?  - Chih-Chun Chen, Dimitry Foures, Elena Chatzimichali, Giuseppe Vettigli, Raoul-Gabriel Urma
    + [slides](https://github.com/cambridgecoding/pydata-tutorial)
* SATURDAY 9 A.M.–9:50 A.M.      Keynote - How to Find Stories in Data - Helena Bengtsson
* SATURDAY 10 A.M.–10:40 A.M.    Political risk event extraction using Python and Apache Storm - Aeneas Wiener
* SATURDAY 10 A.M.–10:40 A.M.    The Dark Art of Search Relevancy - Eddie Bell
* SATURDAY 10 A.M.–10:40 A.M.    Veni, Vidi, Voronoi: Attacking Viruses using spherical Voronoi diagrams in Python - Tyler Reddy
    + [code](https://github.com/tylerjereddy/py_sphere_Voronoi)
* SATURDAY 10:50 A.M.–11:30 A.M. Using the SALib Library for Conducting Sensitivity Analyses of Models - Will Usher
* SATURDAY 10:50 A.M.–11:30 A.M. Data-visualisation with Python and Javascript: crafting a data-viz toolchain for the web - Kyran Dale
* SATURDAY 10:50 A.M.–11:30 A.M. Agent-Based Modelling, the London riots, and Python - Thomas French, Fred Farrell
    + [slides](http://www.slideshare.net/Sandtable/pydata-london-2015-49684898)
* SATURDAY 11:40 A.M.–12:20 P.M. How We Turned Everyone at Our Company into Analysts with Python and SQL - Arik Fraimovich
* SATURDAY 11:40 A.M.–12:20 P.M. Python and scikit-learn based open research SDK for collaborative data management and exchange - Grigori Fursin, Anton Lokhmotov
* SATURDAY 11:40 A.M.–12:20 P.M. The London Air Quality API - Andrew Grieve
* SATURDAY 1:20 P.M.–2:10 P.M.   Keynote: What's it Like to be a Bot? - Eric Drass
* SATURDAY 2:20 P.M.–3 P.M.      Getting Meaning from Scientific Articles - Éléonore Mayola
* SATURDAY 2:20 P.M.–3 P.M.      If It Weighs the Same as a Duck: Detecting Fraud with Python and Machine Learning - Ryan Wang
    + [slides](http://www.slideshare.net/wgyn/pydata-london2015)
* SATURDAY 2:20 P.M.–3 P.M.      PyPy, The Python Scientific Community and C extensions - Romain Guillebert
* SATURDAY 3:20 P.M.–4 P.M.      Rescuing and Exploring Complex Life Science Data - Paul Agapow
* SATURDAY 3:20 P.M.–4 P.M.      Financial Risk Management: Analytics and Aggregation with the PyData stack - Miguel Vaz
* SATURDAY 4:10 P.M.–4:50 P.M.   Smart Cars of Tomorrow: Real-Time Driving Patterns - Ronert Obst
* SATURDAY 4:10 P.M.–4:50 P.M.   Making Computations Execute Very Quickly - Russel Winder
* SATURDAY 4:10 P.M.–4:50 P.M.   A Tube Story: How can Python help us understand London's most important transportation network? - Camilla Montonen
* SATURDAY 5 P.M.–5:30 P.M.      Simulating Quantum Physics in Less Than 20 Lines of Pure Python - Katie Barr
* SATURDAY 5 P.M.–5:30 P.M.      How DataKind UK helped Citizens Advice get more from their data - Emma Prest, Billy Wong
* SATURDAY 5 P.M.–5:30 P.M.      Hyperparameter Optimisation for Machine Learning in Python: Building an automatic scientist - Thomas Greg Corcoran
* SUNDAY 9 A.M.–9:50 A.M.        Keynote: CRISP-DM: The Dominant Process for Data Mining - Meta S. Brown
* SUNDAY 10 A.M.–10:40 A.M.      A practical guide to conquering social network data - Benjamin Chamberlain, davide donato, Josh Levy-Kramer
* SUNDAY 10 A.M.–10:40 A.M.       Information Surprise or How to Find Data - Oleksandr Pryymak
    + [slides](https://docs.google.com/presentation/d/1fVMYTXcWD40aKo6_Z4iTpX2IKLZcAO13U7d5hcUN2EU/edit?usp=sharing)
* SUNDAY 10 A.M.–10:40 A.M.      Localising Organs of the Fetus in MRI Data Using Python - Kevin Keraudren
    + [slides](http://www.doc.ic.ac.uk/~kpk09/PyData_KevinKeraudren.pdf)
* SUNDAY 10:50 A.M.–11:30 A.M.   Our Data, Ourselves - Giles Greenway
* SUNDAY 10:50 A.M.–11:30 A.M.   Python for Image and Text Understanding: One Model to rule them all! - Roelof Pieters
* SUNDAY 10:50 A.M.–11:30 A.M.   Collect and Visualise Metrics With InfluxDB and Grafana - Marek Mroz
* SUNDAY 11:40 A.M.–12:20 P.M.   Jointly Embedding knowledge from large graph databases with textual data using deep learning   - Armando Vieira
* SUNDAY 11:40 A.M.–12:20 P.M.   Ship It! - Ian Ozsvald
    + [slides](https://speakerdeck.com/ianozsvald/ship-it-pydatalondon-2015)
    + [code](http://ianozsvald.com/2015/06/21/pydatalondon-2015-write-up-and-my-ship-it-talk-on-publishing-data-science-products/)
* SUNDAY 11:40 A.M.–12:20 P.M.   Constructing protein structural features for Machine Learning.  - Ricardo Corral Corral
* SUNDAY 1:20 P.M.–2 P.M.        A Fast, Offline Reverse Geocoder in Python - Ajay Thampi
* SUNDAY 1:20 P.M.–2 P.M.        Performance Pandas - Jeff Reback
    + [slides](https://github.com/jreback/pydata2015-london)
    + [code](https://github.com/jreback/pydata2015-london)
* SUNDAY 1:20 P.M.–2 P.M.        Hacking Human Language - Hendrik Heuer
    + [slides](https://dl.dropboxusercontent.com/u/5041011/Heuer_Hacking_Human_Language_PyData.pdf)
    + [code](https://github.com/h10r/topic_comparison_tool)
* SUNDAY 2:10 P.M.–2:50 P.M.     Hierarchical Data Clustering in Python - Frank Kelly
* SUNDAY 2:10 P.M.–2:50 P.M.     Sudo Make me a (London) Map - Linda Uruchurtu
* SUNDAY 2:10 P.M.–2:50 P.M.     Jupyter (IPython): how a notebook is changing science - Juan Luis Cano
* SUNDAY 3:05 P.M.–3:45 P.M.     A Beginner's Guide to Building Data Pipelines with Luigi - Dylan Barth, Stuart Coleman
* SUNDAY 3:05 P.M.–3:45 P.M.     Machine Learning with Imbalanced Data Sets - Natalie Hockham
* SUNDAY 3:05 P.M.–3:45 P.M.     Integration with the Vernacular - James Powell
* SUNDAY 3:55 P.M.–4:35 P.M.     Defining Degrees of Separation in Data Classifications Using Predictive Modelling  - Yiannis Pavlosoglou, Adam Reviczky, Neri Van Otten
* SUNDAY 3:55 P.M.–4:35 P.M.     NLP on a Billion Documents: Scalable machine learning with spark - martin goodson
* SUNDAY 3:55 P.M.–4:35 P.M.     Deploying a Model to Production - Alex Chamberlain