In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from pathlib import Path

In [2]:
# Disable cell text truncation
pd.set_option('display.max_colwidth', -1)

In [3]:
# Markdown output file
md_file_path = Path('output') / 'amld_workshop_schedule.md'
md_file_path_w_prereqs = Path('output') / 'amld_workshop_schedule_w_prereqs.md'

base_url = 'https://www.appliedmldays.org'
front_url = f'{base_url}/workshops.html'

r = requests.get(front_url)
front_soup = BeautifulSoup(r.text)

In [39]:
def get_time_from_soup(soup):
    h1 = soup.select_one('.masthead-text').find('h1')
    h1.find('strong').clear()
    time_str = h1.text.strip()
    parts = time_str.split()
    if len(parts) > 2:
        return ''.join(parts[:-2]), ' '.join(parts[-2:])
    return '', time_str

free = '-' * 3
busy = '█' * 3

def get_timeline_blocks(time):
    start, end = (int(v.split(':',1)[0]) for v in time.split('-'))
    tl = ''
    tl += free if start > 10 else busy
    tl += free if end < 13 else busy
    return tl

def get_level_from_soup(soup):
    badge = soup.select_one('.badge-grey')
    return badge.text.split()[0] if badge is not None else ''

def get_prerequisites_from_soup(soup):
    try:
        return [li.text.strip() for li in soup.find('h3', text='Prerequisites').findNext('ul').findAll('li')]
    except AttributeError:
        # One or more of the elements are None
        return []
    
def get_prerequisite_html_from(prerequisites):
    if not prerequisites:
        return ''
    lis = '</li><li>'.join(f' - {p}' for p in prerequisites)
    return f'<ul><li>{ lis }</li></ul>'

def generate_rows():
    for workshop_title in front_soup.find_all(class_='track-name'):
        title = workshop_title.text
        a = workshop_title.parent
        url = a.attrs.get('href')
        url = f'{base_url}{url}'
        ws_soup = BeautifulSoup(requests.get(url).text)
        time, date = get_time_from_soup(ws_soup)
        level = get_level_from_soup(ws_soup)
        prerequisites = get_prerequisites_from_soup(ws_soup)
        timeline = get_timeline_blocks(time)
        is_full_day = timeline[0] == timeline[-1]
        yield timeline, time, date, title, level, prerequisites, url, is_full_day

In [40]:
# bss = '▀ ▁ ▂ ▃ ▄ ▅ ▆ ▇ █ ▉ ▊ ▋ ▌ ▍ ▎ ▏'.split()
# bn = '█ ▀ ▄'

# for bi, b in enumerate(bss):
#     print(bn)
#     print(b * 5, bi, b)
#     print('')


In [41]:
df = pd.DataFrame(generate_rows(), columns=['Timeline', 'Time', 'Date', 'Title', 'Level', 'Prerequsites', 'Link', 'FullDay'])

In [43]:
sdf = df.sort_values(by=['Date', 'FullDay', 'Time', 'Level'])
sdf.head()

Unnamed: 0,Timeline,Time,Date,Title,Level,Prerequsites,Link,FullDay
8,███---,09:00-12:00,January 26,ML in your organization: a practical toolbox to identify and seize highest value opportunities in Machine Learning,Beginner,[],https://www.appliedmldays.org/workshops/ml-in-your-organization-a-practical-toolbox-to-identify-and-seize-highest-value-opportunities-in-machine-learning,False
10,███---,09:00-12:00,January 26,Tutorial: Build your first predictive model to forecast and detect anomalies,Beginner,[],https://www.appliedmldays.org/workshops/tutorial-build-your-first-predictive-model-to-forecast-and-detect-anomalies,False
6,███---,09:00-12:00,January 26,Hands-on deep learning with TensorFlow.js,Intermediate,"[experience with machine learning in Python or JS, or in building web applications with JavaScript]",https://www.appliedmldays.org/workshops/hands-on-deep-learning-with-tensorflow-js,False
7,███---,09:00-12:00,January 26,Data Augmentation and Segmentation with Generative Networks for Medical Imaging,Intermediate,"[You must bring your own laptop in order to run the training., A current browser is needed. For optimal performance, Chrome, Firefox or Safari for Macs are recommended. IE is operational but does not provide the best performance., Create an account at https://courses.nvidia.com/join, Ensure your laptop will run smoothly by going to http://websocketstest.com/, Make sure that WebSockets work for you by seeing under Environment, WebSockets is supported and Data Receive, Send and Echo Test all check Yes under WebSockets (Port 80)., If there are issues with WebSockets, try updating your browser.]",https://www.appliedmldays.org/workshops/data-augmentation-and-segmentation-with-generative-networks-for-medical-imaging,False
9,███---,09:00-12:00,January 26,PySpark: Big Data Processing and Machine Learning with Python,Intermediate,"[be familiar with Pandas and Scikit-learn libraries of Python, laptop with Jupyter notebook or Jupyterlab already installed, it is highly recommended that participants install Pyspark in advance (recommended way of installation is via Anaconda).]",https://www.appliedmldays.org/workshops/pyspark-big-data-processing-and-machine-learning-with-python,False


In [44]:
markdown_str = f"""
## Applied Machine Learning Days
Table of workshops generated from [the overview page][ws]. 

Versions: [normal][n] | [with prerequisites][wp] 



\[ For the curious on [how it was created][jnb] \]

[n]: ./{md_file_path.name}
[wp]: ./{md_file_path_w_prereqs.name}
[ws]: https://www.appliedmldays.org/workshops.html
[jnb]: https://nbviewer.jupyter.org/github/fauskanger/public/blob/master/AppliedMachineLearningDaysWorkshopTableGenerator.ipynb

"""

In [45]:
def create_md_table():
    md_table = '| Timeline | Time | Date | Level | Title \n'
    md_table += '|---|---|---|---|---|\n'
    for row in sdf.iterrows():
        i, r = row
        md_table += f'| `{r.Timeline}` | {r.Time} | {r.Date} | {r.Level} | [{r.Title}]({r.Link})| \n'
    return md_table

def create_md_table_with_prereqs(use_br=True):
    md_table = '| Timeline | Time | Date | Level | Title | Prerequisites \n'
    md_table += '|---|---|---|---|---|---|\n'
    for row in sdf.iterrows():
        i, r = row
    #     prereqs = get_prerequisite_html_from(r.Prerequsites)
        if use_br:
            prereqs = '<br/>'.join(' - ' + k[:1].upper() + k[1:] for k in r.Prerequsites)
        else:
            prereqs = ' :: '.join(k[:1].upper() + k[1:] for k in r.Prerequsites)
        md_table += f'| `{r.Timeline}` | {r.Time} | {r.Date} | {r.Level} | [{r.Title}]({r.Link}) | {prereqs} | \n'
    return md_table

In [46]:
md_file_path.write_text(markdown_str + create_md_table(), encoding='utf8')
md_file_path_w_prereqs.write_text(markdown_str + create_md_table_with_prereqs(), encoding='utf8')

11137