In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from pathlib import Path

In [2]:
# Disable cell text truncation
pd.set_option('display.max_colwidth', -1)

In [3]:
# Markdown output file
md_file_path = Path('output') / 'amld_workshop_schedule.md'

base_url = 'https://www.appliedmldays.org'
front_url = f'{base_url}/workshops.html'

r = requests.get(front_url)
front_soup = BeautifulSoup(r.text)

In [4]:
def get_time_from_soup(soup):
    h1 = soup.select_one('.masthead-text').find('h1')
    h1.find('strong').clear()
    time_str = h1.text.strip()
    parts = time_str.split()
    if len(parts) > 2:
        return ''.join(parts[:-2]), ' '.join(parts[-2:])
    return '', time_str

def get_level_from_soup(soup):
    badge = soup.select_one('.badge-grey')
    return badge.text.split()[0] if badge is not None else ''

def generate_rows():
    for workshop_title in front_soup.find_all(class_='track-name'):
        title = workshop_title.text
        a = workshop_title.parent
        url = a.attrs.get('href')
        url = f'{base_url}{url}'
        ws_soup = BeautifulSoup(requests.get(url).text)
        time, date = get_time_from_soup(ws_soup)
        level = get_level_from_soup(ws_soup)
        yield time, date, title, level, url

In [5]:
df = pd.DataFrame(generate_rows(), columns=['Tid', 'Dato', 'Tittel', 'Level', 'Link'])

In [6]:
sdf = df.sort_values(by=['Dato', 'Tid', 'Level'])
sdf

Unnamed: 0,Tid,Dato,Tittel,Level,Link
1,,January 26,ML in your organization: a practical toolbox to identify and seize highest value opportunities in Machine Learning,Beginner,https://www.appliedmldays.org/workshops/ml-in-your-organization-a-practical-toolbox-to-identify-and-seize-highest-value-opportunities-in-machine-learning
3,,January 26,Tutorial: Build your first predictive model to forecast and detect anomalies,Beginner,https://www.appliedmldays.org/workshops/tutorial-build-your-first-predictive-model-to-forecast-and-detect-anomalies
4,,January 26,Reatching into the Rabbit Hole: Should we replace teachers with AI?,Beginner,https://www.appliedmldays.org/workshops/reatching-into-the-rabbit-hole-should-we-replace-teachers-with-ai
0,,January 26,Hands-on deep learning with TensorFlow.js,Intermediate,https://www.appliedmldays.org/workshops/hands-on-deep-learning-with-tensorflow-js
5,,January 26,Engineering for good - detecting pneumonia in X-Ray images,Intermediate,https://www.appliedmldays.org/workshops/engineering-for-good-detecting-pneumonia-in-x-ray-images
2,09:00-12:00,January 26,PySpark: Big Data Processing and Machine Learning with Python,Intermediate,https://www.appliedmldays.org/workshops/pyspark-big-data-processing-and-machine-learning-with-python
9,09:00-16:30,January 26,Data exploration and preparation for Machine Learning,Beginner,https://www.appliedmldays.org/workshops/data-exploration-and-preparation-for-machine-learning
8,09:00-16:30,January 26,TDA crash course: theory and practice for ML applications,Intermediate,https://www.appliedmldays.org/workshops/tda-crash-course-theory-and-practice-for-ml-applications
10,09:00-16:30,January 26,Learning and Processing over Networks,Intermediate,https://www.appliedmldays.org/workshops/learning-and-processing-over-networks
11,09:00-16:30,January 26,Applied Machine Learning for Anomaly Detection on Equipment,Intermediate,https://www.appliedmldays.org/workshops/applied-machine-learning-for-anomaly-detection-on-equipment


In [7]:
markdown_str = """
## Applied Machine Learning Days
Table of workshops generated from [the overview page][ws].

\[ For the curious on [how it was created][jnb] \]

[ws]: https://www.appliedmldays.org/workshops.html
[jnb]: https://nbviewer.jupyter.org/github/fauskanger/public/blob/master/AppliedMachineLearningDaysWorkshopTableGenerator.ipynb

"""

In [8]:
md_table = '| Tid | Dato | Level | Tittel |\n'
md_table += '|---|---|---|---|\n'

for row in sdf.iterrows():
    i, r = row
    md_table += f'| {r.Tid} | {r.Dato} | {r.Level} | [{r.Tittel}]({r.Link}) |\n'

In [9]:
md_file_path.write_text(f'{markdown_str}{md_table}')

5282