In [216]:
# Imports and setup 
from bs4 import BeautifulSoup
import bs4
import urllib.request

from ics import Calendar, Event
from ics.parse import ContentLine
from collections import namedtuple
from datetime import datetime, time, date, timedelta
import arrow
from copy import deepcopy
import re

# Set these for your specific use case:
semester_start = "08/24/2020"
semester_end = "12/04/2020"
timezone = "America/Denver"
url = 'https://student.apps.utah.edu/uofu/stu/ClassSchedules/main/1208/class_list.html?subject=CS'

# Convert these to Arrow objects.
semester_start = arrow.get(semester_start, 'MM/DD/YYYY')
semester_end = arrow.get(semester_end, 'MM/DD/YYYY')

# Tuple definition.
DTLInfo = namedtuple('DTLInfo', 
                    ['days',
                     'time',
                     'location'])
ClassInfo = namedtuple('ClassInfo', 
                       ['identifier', 
                        'section', 
                        'title', 
                        'component', 
                        'dtls', 
                        'instructor'])

In [217]:
# Get HTML of webpage.
with urllib.request.urlopen(url) as response:
    html = response.read().decode('utf-8')

    bs = BeautifulSoup(html, 'html.parser')

In [218]:
# Helper methods to parse HTML.
def process_title(div):
    return list(x.contents[0].strip() for x in div if type(x) != bs4.element.NavigableString)

def process_subtitle(div):
    subtitle = {}
    
    for block in div.children:
        if type(block) == bs4.element.NavigableString:
            continue
            
        key = None
        val = None

        for fields in block.get_text().split(':'):
            for content in fields.split('\n'):
                content = content.strip()

                if content == '':
                    continue

                if key == None:
                    key = content
                elif val == None:
                    val = content
                    break

        if key == None:
            raise RuntimeError("Invalid block {}".format(block))
        if val == None:
            val = ""            
            
        if key not in subtitle:
            subtitle[key] = []
        subtitle[key].append(val)
        
    return subtitle


def process_body(div):
    body = {}

    headers = []
    for header in div.find('thead').find_all('th'):
        header = header.get_text()
        
        if '/' not in header:
            headers.append(header.strip())
        else:
            headers.append("/".join(spl.strip() for spl in header.split('/')))
    
    values = [[] for _ in range(len(headers))]
    for row in div.find('tbody').find_all('tr'):
        for idx, val in enumerate(row.find_all('th')):
            val = val.get_text()

            if '/' not in val:
                values[idx].append(val.strip())
            else:
                values[idx].append("/".join(spl.strip() for spl in val.split('/')))
    
    for key, val in zip(headers, values):
        body[key] = val
    
    return body


def process_dtls(dtls_dict):
    dtls = []
    for times, location in zip(dtls_dict['Days/Times'], dtls_dict['Locations']):
        dtls.append(DTLInfo(
            times.split('/')[0],
            times.split('/')[1],
            location
        ))
        
    return dtls


def make_class_info(class_card):
    title = process_title(card.find("h3"))
    subtitle = process_subtitle(card.find("ul", attrs={'class': 'row breadcrumb-list list-unstyled'}))
    body = card.find('table', attrs={'class': 'table time-table'})
    
    dtls = None
    if body is not None:
        dtls = process_dtls(process_body(body))
            
    return ClassInfo(
        title[0],
        title[1],
        title[2],
        subtitle['Component'][0],
        dtls,
        " & ".join(subtitle['Instructor'])
    )
    
    

In [219]:
# Parse HTML of webpage.
table = bs.find(id='class-details')
class_info = []

# Generate list of class infos.
for card in table.children:
    if type(card) == bs4.element.NavigableString:
        continue
        
    class_info.append(make_class_info(card))
    
# print(class_info)

In [220]:
# Calendar helper methods.
def weekday_str_to_int(weekday):
    if weekday == "Mo":
        return 0
    elif weekday == "Tu":
        return 1
    elif weekday == "We":
        return 2
    elif weekday == "Th":
        return 3
    elif weekday == "Fr":
        return 4
    elif weekday == "Sa":
        return 5
    elif weekday == "Su":
        return 6
        

def weekday_int_to_str(weekday):
    if weekday == 0:
        return "Mo"
    elif weekday == 1:
        return "Tu"
    elif weekday == 2:
        return "We"
    elif weekday == 3:
        return "Th"
    elif weekday == 4:
        return "Fr"
    elif weekday == 5:
        return "Sa"
    elif weekday == 6:
        return "Su"
    
    
def get_next_weekday(date: arrow.Arrow, weekday: int):
    if date.weekday() == int:
        return deepcopy(date)
    
    return date.shift(days=((weekday - date.weekday()) % 7))    

In [221]:
# Class info to calendar event methods.
def get_start_end_time(dtl):
    start_day = get_next_weekday(semester_start, weekday_str_to_int(dtl.days[:2]))
    
    start_time, end_time = (arrow.get(t, 'HH:mmA').replace(tzinfo=timezone) for t in dtl.time.split('-'))
    
    start_time = start_time.replace(month=start_day.month, day=start_day.day, year=start_day.year)
    end_time = end_time.replace(month=start_day.month, day=start_day.day, year=start_day.year)
    
    return start_time, end_time

def generate_events(class_info: ClassInfo):
    events = []
    
    if class_info.dtls is None:
        return events
    
    for dtl in class_info.dtls:
        start_time, end_time = get_start_end_time(dtl)
        repeat_days = [d.upper() for d in (dtl.days[i:i+2] for i in range(0, len(dtl.days), 2))]

        e = Event()
        e.name = f"{class_info.identifier}-{class_info.section} {class_info.title} {class_info.component}"
        e.begin = start_time
        e.end = end_time
        
        repeat_rule = f"FREQ=WEEKLY;WKST=SU;UNTIL={ semester_end.format('YYYYMMDDTHHmmss') + 'Z' };BYDAY={ ','.join(repeat_days) }"
        e.extra.append(ContentLine(name="RRULE", value=repeat_rule))
        
        e.location = dtl.location
        e.description = f"Taught by {class_info.instructor}"
        
        events.append(e)
    
    return events

def generate_calendar(class_info):
    c = Calendar()
    events = []
    
    for ci in class_info:
        for event in generate_events(ci):
            events.append(event)
        
    c.events = events
    c.extra.append(ContentLine(name="TZID", value=timezone))
    return c

In [222]:
cal = str(generate_calendar(class_info))
cal = cal.replace("DTSTART", f"DTSTART;TZID={timezone}")
cal = cal.replace("DTEND", f"DTEND;TZID={timezone}")

print(cal)

with open("cs_class_schedule.ics", "w") as file:
    file.writelines(cal)

BEGIN:VCALENDAR
TZID:America/Denver
VERSION:2.0
PRODID:ics.py - http://git.io/lLljaA
BEGIN:VEVENT
RRULE:FREQ=WEEKLY;WKST=SU;UNTIL=20201204T000000Z;BYDAY=TU,TH
DESCRIPTION:Taught by RILOFF\, E. M.
DTEND;TZID=America/Denver:20200825T212000Z
LOCATION:CANVAS .
DTSTART;TZID=America/Denver:20200825T200000Z
SUMMARY:CS 534-001 Natural Language Lecture
UID:b38c2f47-e6b9-47f0-a186-5863fe930f76@b38c.org
END:VEVENT
BEGIN:VEVENT
RRULE:FREQ=WEEKLY;WKST=SU;UNTIL=20201204T000000Z;BYDAY=WE
DESCRIPTION:Taught by BROWN\, C. S. & REGEHR\, J.
DTEND;TZID=America/Denver:20200826T152500Z
LOCATION:MEK 3550
DTSTART;TZID=America/Denver:20200826T143500Z
SUMMARY:CS 1030-011 Foundations of CS Laboratory
UID:bc0c7c87-073b-47e6-9072-b7a29bd58b49@bc0c.org
END:VEVENT
BEGIN:VEVENT
RRULE:FREQ=WEEKLY;WKST=SU;UNTIL=20201204T000000Z;BYDAY=MO,FR
DESCRIPTION:Taught by BROWN\, C. S. & REGEHR\, J.
DTEND;TZID=America/Denver:20200824T152500Z
LOCATION:CANVAS .
DTSTART;TZID=America/Denver:20200824T143500Z