In [70]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [90]:
# Disable cell text truncation
pd.set_option('display.max_colwidth', -1)

In [71]:
base_url = 'https://www.appliedmldays.org'
front_url = f'{base_url}/workshops.html'

r = requests.get(front_url)
front_soup = BeautifulSoup(r.text)

In [82]:
def get_time_from_soup(soup):
    h1 = soup.select_one('.masthead-text').find('h1')
    h1.find('strong').clear()
    time_str = h1.text.strip()
    parts = time_str.split()
    if len(parts) > 2:
        return ''.join(parts[:-2]), ' '.join(parts[-2:])
    return '', time_str

In [83]:
def generate_rows():
    for workshop_title in front_soup.find_all(class_='track-name'):
        title = workshop_title.text
        a = workshop_title.parent
        url = a.attrs.get('href')
        url = f'{base_url}{url}'
        ws_soup = BeautifulSoup(requests.get(url).text)
        time, date = get_time_from_soup(ws_soup)
        
        yield time, date, title, url

In [84]:
df = pd.DataFrame(generate_rows(), columns=['Tid', 'Dato', 'Tittel', 'Link'])

In [91]:
sdf = df.sort_values(by=['Dato', 'Tid'])
sdf

Unnamed: 0,Tid,Dato,Tittel,Link
0,,January 26,Hands-on deep learning with TensorFlow.js,https://www.appliedmldays.org/workshops/hands-on-deep-learning-with-tensorflow-js
2,,January 26,ML in your organization: a practical toolbox to identify and seize highest value opportunities in Machine Learning,https://www.appliedmldays.org/workshops/ml-in-your-organization-a-practical-toolbox-to-identify-and-seize-highest-value-opportunities-in-machine-learning
4,,January 26,Tutorial: Build your first predictive model to forecast and detect anomalies,https://www.appliedmldays.org/workshops/tutorial-build-your-first-predictive-model-to-forecast-and-detect-anomalies
5,,January 26,Engineering for good - detecting pneumonia in X-Ray images,https://www.appliedmldays.org/workshops/engineering-for-good-detecting-pneumonia-in-x-ray-images
3,09:00-12:00,January 26,PySpark: Big Data Processing and Machine Learning with Python,https://www.appliedmldays.org/workshops/pyspark-big-data-processing-and-machine-learning-with-python
7,09:00-16:30,January 26,TDA crash course: theory and practice for ML applications,https://www.appliedmldays.org/workshops/tda-crash-course-theory-and-practice-for-ml-applications
8,09:00-16:30,January 26,Data exploration and preparation for Machine Learning,https://www.appliedmldays.org/workshops/data-exploration-and-preparation-for-machine-learning
9,09:00-16:30,January 26,Learning and Processing over Networks,https://www.appliedmldays.org/workshops/learning-and-processing-over-networks
10,09:00-16:30,January 26,Applied Machine Learning for Anomaly Detection on Equipment,https://www.appliedmldays.org/workshops/applied-machine-learning-for-anomaly-detection-on-equipment
11,09:00-16:30,January 26,TensorFlow Basics - Saturday,https://www.appliedmldays.org/workshops/tensorflow-basics-saturday


In [94]:
# Print html table to put in markdown file
print(sdf.to_html(index=False))

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Tid</th>
      <th>Dato</th>
      <th>Tittel</th>
      <th>Link</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td></td>
      <td>January 26</td>
      <td>Hands-on deep learning with TensorFlow.js</td>
      <td>https://www.appliedmldays.org/workshops/hands-on-deep-learning-with-tensorflow-js</td>
    </tr>
    <tr>
      <td></td>
      <td>January 26</td>
      <td>ML in your organization: a practical toolbox to identify and seize highest value opportunities in Machine Learning</td>
      <td>https://www.appliedmldays.org/workshops/ml-in-your-organization-a-practical-toolbox-to-identify-and-seize-highest-value-opportunities-in-machine-learning</td>
    </tr>
    <tr>
      <td></td>
      <td>January 26</td>
      <td>Tutorial: Build your first predictive model to forecast and detect anomalies</td>
      <td>https://www.appliedmldays.org/workshops/tutorial-build-your-first-predictive-