This notebook scrapes the Alma and Primo release notes in the Ex Libris Knowledge Center and creates a CSV file with headings for each item. Recommend reviewing the results for comprehensiveness.

In [2]:
import csv
import requests
from bs4 import BeautifulSoup

In [3]:
def get_headings(url, year, month):
    """Scrapes headings of items in release notes URL.
    url param is page for target month's release notes
    """
    r = requests.get(url)
    soup = BeautifulSoup(r.text)
    headings = soup.find_all("h3")
    items = []
    
    # class used in current month's sections, e.g. 201812BASE
    # also found in the release notes URL
    datekey = str(year) + str(month) + 'BASE'
    
    # get the major items
    for heading in headings:
        for parent in heading.find_parents("div"):
            if datekey in parent.attrs["class"]:
                items.append(heading.get_text())
    
    # get additional enhancements
    enhancements = []
    small_items = soup.find_all("li", class_=datekey)

    # get text from all the elements in each enhancement li tag
    for small in small_items:
        pieces = []
        #get the text parts (siblings)
        siblings = small.find("br").next_siblings
        for sibling in siblings:
            #if the part is a string, strip the whitespace and newlines and add to pieces
            if sibling.string:
                piece = sibling.string.replace("\n", "").strip()
                pieces.append(piece)
        enhancements.append(" ".join(pieces))
    #add the enhancements to the major items, with a divider header
    items.append("RESOLVED ISSUES")
    items.extend(enhancements)
    
    return items


def make_csv(filename, rows):
    """Creates a CSV file with columns for actions. """
    with open(filename, 'w', newline='') as f:
        writer = csv.writer(f, delimiter=',', quoting=csv.QUOTE_ALL)
        header = ['Heading','Reviewer(s)','Relevant to GW?','Needs discussion?', 'Notes']
        writer.writerow(header)
        for row in rows:
            writer.writerow([row])

alma_url is the URL for the month's release notes. Update the parameters in the call to get_headings() for the current year and month. 

In [3]:
alma_url = "https://knowledge.exlibrisgroup.com/Alma/Release_Notes/010_2018/001Alma_2018_Release_Notes?mon=201812BASE"
alma_results = get_headings(alma_url, 2018, 12)

In [4]:
filename = "alma-201812.csv"
make_csv(filename, alma_results)

primo_url is the URL for the month's release notes. Update the parameters in the call to get_headings() for the current year and month. 

In [4]:
primo_url = "https://knowledge.exlibrisgroup.com/Primo/Release_Notes/002Primo_VE/0972019/002Primo_VE_2019_Release_Notes?mon=201901BASE"
primo_results = get_headings(primo_url, 2019, "01")

In [6]:
primo_filename = "primo-201901.csv"
make_csv(primo_filename, primo_results)

Import the resulting CSV files into Google Sheets and add a link to the websites for getting further information. 
