In [1]:
from pathlib import Path
import re
import urllib

from bs4 import BeautifulSoup
import requests

import pandas as pd

In [2]:
import pyapacweb

In [3]:
# login
site = pyapacweb.SiteConnector(url_base='https://tw.pycon.org/2015apac', lang='en')
r = site.login(keychain_pth='./.web_keychain')
r

<Response [200]>

In [4]:
proposal_review_url = (
    r'https://tw.pycon.org/2015apac/en/'
    r'proposal_review/proposals'
)
r = site._session.get(proposal_review_url)
soup = BeautifulSoup(r.content)
table = soup.find('table')
df_talks = pyapacweb.make_review_table(table)

In [5]:
wanted_columns = ['ID', 'Author', 'Lang', 'Title', 'Speech Type']

In [6]:
accepted_talks = df_talks[df_talks['Decision'] == 'Accepted'].loc[:, wanted_columns]

In [7]:
undecided_talks = df_talks[~df_talks.Decision.isin(['Accepted', 'Rejected'])].loc[:, wanted_columns]

In [8]:
accepted_talks.groupby('Speech Type').size()

Speech Type
Lightning Talk                7
Regular Talk                 36
SciPy Talk (English Only)     5
Tutorial                      3
Workshop                      4
dtype: int64

In [10]:
undecided_talks

Unnamed: 0,ID,Author,Lang,Title,Speech Type
22,50,Takayuki Shimizukawa,English,Easy contributable internationalizatoin proces...,Regular Talk
23,88,Adrian Liaw,Mandarin,國中打拍 自學經驗談,Regular Talk
37,34,ChunHan Lai,Mandarin,雲端語音合成技術應用於長篇文章音文同步有聲書之建立,Regular Talk
39,27,Apua Juan,Mandarin,....when thinking functionally with Python,Regular Talk
40,63,Victor Gau,Mandarin,Creating Map Applications Using Python,Regular Talk


In [12]:
accepted_talks.head()

Unnamed: 0,ID,Author,Lang,Title,Speech Type
0,84,Tzu-ping Chung,Mandarin,UnicodeEncodeError: can't encode character in ...,Regular Talk
1,56,Melvin Foo Hannie Ching Tan Le Xuan,English,RPyScan,Regular Talk
2,24,Liang Bo Wang,Mandarin,Python HDF5 Use Case,Regular Talk
3,25,Dan Maas,Mandarin,MMO Game Server Design with Twisted Python,Regular Talk
4,91,Chia-Yi Yen,English,Millions way to avoid overfitting when buildin...,Regular Talk


In [20]:
grouped = accepted_talks.sort(['Speech Type', 'Lang', 'Author', 'Title']).groupby(['Speech Type'])

In [28]:
from jinja2 import Template

In [39]:
talk_table_tpl = Template('''\
<table>
  <thead>
    <tr>
      <th>ID</th> <th>Speaker</th> <th>Lang</th> <th>Title</th>
    </tr>
  </thead>
  <tbody>
    {% for _, talk in talks %}
    <tr>
      <td>{{ talk.ID }}</td> <td>{{ talk.Author }}</td> <td>{{ talk.Lang }}</td> <td>{{ talk.Title }}</td>
    </tr>
    {% endfor %}
  </tbody>
</table>
''')

In [42]:
regular_talks = grouped.get_group('Regular Talk')
scipy_talks = grouped.get_group('SciPy Talk (English Only)')
lightning_talks = grouped.get_group('Lightning Talk')

In [50]:
def create_talk_table(talk_df):
    rendered_html = talk_table_tpl.render(talks=talk_df.iterrows())
    return '\n'.join(
        line for line in rendered_html.splitlines() if line.strip()  # clean trailing whitespace
    )

In [54]:
create_talk_table(lightning_talks)

"<table>\n  <thead>\n    <tr>\n      <th>ID</th> <th>Speaker</th> <th>Lang</th> <th>Title</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>100</td> <td>Chih-Cheng Liang</td> <td>English</td> <td>Learning machine learning Python agilely</td>\n    </tr>\n    <tr>\n      <td>23</td> <td>Chiu-Hsiang Hsu</td> <td>English</td> <td>CPython's bug in feature that nobody uses</td>\n    </tr>\n    <tr>\n      <td>51</td> <td>Lee Yang Peng</td> <td>English</td> <td>Network Security and Analysis system with Python</td>\n    </tr>\n    <tr>\n      <td>75</td> <td>Shunsuke Hida</td> <td>English</td> <td>The Effectiveness of Unit Tests in Django Backend Engineering of Large-scale Web Service</td>\n    </tr>\n    <tr>\n      <td>33</td> <td>Albert Huang</td> <td>Mandarin</td> <td>Preprocessing Chinese Characters for LaTeX: pycwtex</td>\n    </tr>\n    <tr>\n      <td>28</td> <td>Apua Juan</td> <td>Mandarin</td> <td>開發小工具的心得</td>\n    </tr>\n    <tr>\n      <td>44</td> <td>meng-hao chung</td>