In [1]:
from pathlib import Path
import re
import urllib

from bs4 import BeautifulSoup
import requests

import pandas as pd

In [2]:
from pyapacweb import SiteConnector

In [6]:
# login
site = SiteConnector(url_base='https://tw.pycon.org/2015apac', lang='en')
r = site.login(keychain_pth='./.web_keychain')
r

<Response [200]>

In [7]:
proposal_review_url = (
    r'https://tw.pycon.org/2015apac/en/'
    r'proposal_review/proposals'
)

In [8]:
r = site._session.get(proposal_review_url)
soup = BeautifulSoup(r.content)

In [9]:
table = soup.find('table')

In [40]:
def make_review_table(table):
    def review_status(text):
        if text == 'Update':
            return True
        elif text == 'Review':
            return False
        else:
            return pd.np.nan
        
    def convert_col(col, dtype):
        tmp = col.copy()
        tmp[tmp == '－－－'] = pd.np.nan
        return tmp.astype(dtype)
    
    df = pd.io.html.read_html(str(table))[0]
    df_out = df.loc[:, ['ID', 'Author', 'Lang', 'Title', 'Speech Type', 'Reviews', 'Sum. Rank', 'Avg. Rank']]
    df_out['Review Status'] = df['Action'].apply(review_status)
    for col, col_type in zip(
        ['Reviews', 'Sum. Rank', 'Avg. Rank'],
        [int] * 2 + [float] * 1
    ):
        df_out[col] = convert_col(df_out.loc[:, col], float)
        
    return df_out

In [43]:
def get_review_url(talk_id):
    return (
        r'https://tw.pycon.org/2015apac/en/'
        r'proposal_review/review/{}/'.format(talk_id)
    )

In [41]:
df_talks = make_review_table(table)

In [45]:
df_talks.head()

Unnamed: 0,ID,Author,Lang,Title,Speech Type,Reviews,Sum. Rank,Avg. Rank,Review Status
0,106,,English,test,Regular Talk,7,20,2.86,True
1,104,Scott Tsai,English,Programmatic Debugging with GDB and Python,Regular Talk,2,5,2.5,False
2,103,Summit Suen,English,"Play Data, Play Ball!",Regular Talk,3,12,4.0,False
3,102,Ko-Lung Yuan,Mandarin,Python與電子設計自動化：用愛與堅持實現專業,Regular Talk,6,26,4.33,True
4,101,Chia-Chi Chang,English,How to build a recommendation system with python,Regular Talk,3,10,3.33,False


In [47]:
r = site._session.get(get_review_url(46))
soup = BeautifulSoup(r.content)

In [52]:
import textwrap
import operator

In [57]:
_, tags, abstract, description, reference = map(operator.attrgetter('text'), soup.select('dl.dl-info dd > pre'))

In [68]:
wrapper = textwrap.TextWrapper(width=70,  replace_whitespace=False)

In [71]:
wrapper.fill(abstract)

'With the scope of cloud services grow from local to worldwide, the\nneed of hosting cloud services across IaaS providers emerged. This\nbrings new challenges to cloud services monitor and managements. The\nCcloud is our solution to provide a unified user interface for cloud\nservice managements across IaaS providers. Moreover, the Ccloud serves\nas an automation service management utility that greatly improves the\nefficiency and minimizes the possible operation errors.'

['Dealing with cloudy days: managing cloud services across IaaS providers in Python',
 '          by',
 '          Sean Hsueh']