In [None]:
import requests
import re
import pandas as pd
from io import StringIO
import bs4
import icalendar
from datetime import datetime, timedelta

# Scrape data from libraries

## Köln (Cologne)

In [None]:
users = {"Cologne account 1": {"Nr":"aXXX", "PIN": "XXXX"}}

In [None]:
df_koeln = pd.DataFrame()
for selectedUser in users:
    print(selectedUser)
    s = requests.Session() 
    r1 = s.get("https://katalog.stbib-koeln.de/alswww2.dll/APS_ZONES?fn=MyZone&Style=Portal3&SubStyle=&Lang=GER&ResponseEncoding=utf-8")
    cookie = r1.cookies.get_dict()
    pattern = '<META NAME="ZonesObjName" +CONTENT="(.*)">'
    token = re.findall(pattern, r1.text)
    objname = token[0]
    payload = {'Method': "CheckID", 'SHA1': "", 'ZonesLogin': '1', 'Interlock': objname, "BrowseAsHloc": "", "Style": "Portal3", "SubStyle": "", "Lang": "GER", "ResponseEncoding":"utf-8", "BRWR": users[selectedUser]["Nr"], "PIN":users[selectedUser]["PIN"]}
    r2 = s.post("https://katalog.stbib-koeln.de/alswww2.dll/" + objname, cookies=cookie, data=payload)
    token = re.findall(pattern, r2.text)
    objname = token[0]
    cookie = r2.cookies.get_dict()
    cntMedia = int(re.findall('<td class="AccountSummaryCounterValueCellStripe">(.*?)</td>', r2.text)[0])
    i = 0
    while cntMedia > 0:
        cntMedia -= 10
        r3 = s.get(f"https://katalog.stbib-koeln.de/alswww2.dll/{objname}?Style=Portal3&SubStyle=&Lang=GER&ResponseEncoding=utf-8" + ("?Method=ShowLoans" if i == 0 else "&Method=PageDown&PageSize=10") , cookies=cookie)
        token = re.findall(pattern, r3.text)
        objname = token[0]
        cookie = r3.cookies.get_dict()
        tbl = re.findall('<table id="BrowseList".*?>.*<\/table>.*SearchResultsTrailer', r3.text, re.MULTILINE | re.DOTALL)
        soup = bs4.BeautifulSoup(tbl[0], 'lxml')
        results = soup.find(attrs = {'id': 'BrowseList'})    
        for row in results.find_all('tr', recursive=False):
            df_row = []
            for col in row.find_all('td', recursive=False):
                table = col.find_all('table')
                if table:
                    df_row = pd.read_html(StringIO(str(col)))[0].transpose()
                    df_row.columns = df_row.iloc[0]
                    df_row = df_row.iloc[1:,:]
                    df_row["Konto"] = selectedUser
            if df_koeln.size == 0:
                df_koeln = df_row
            else:
                df_koeln = pd.concat([df_koeln, df_row])
        i += 1

df_koeln.rename({"Publikationsform": "Medientyp"}, axis=1, inplace=True)
df_koeln["Anz.Verl."].fillna(0, inplace=True)
df_koeln.dropna(axis=1, inplace=True)
df_koeln["Ausleih- Datum"] = pd.to_datetime(df_koeln["Ausleih- Datum"], format="%d/%m/%Y")
df_koeln["Fälligkeitsdatum"] = pd.to_datetime(df_koeln["Fälligkeits datum"].str.split(" ").apply(lambda x: x[0]), format="%d/%m/%Y")
df_koeln["Verlängerbar"] = ~df_koeln["Fälligkeits datum"].str.contains("\?") | df_koeln["Anz.Verl."] > 0
df_koeln["Bibliothek"] = "Köln"
df_koeln

## Kreuztal

In [None]:
users = {"Kreuztal account 1": {"Nr":"XXX", "PIN": "XX.XX.XXXX"}}

In [None]:
df_kreuztal = pd.DataFrame()
for selectedUser in users:
    print(selectedUser)
    s = requests.Session() 
    payload = {"koha_login_context":"opac", "userid": users[selectedUser]["Nr"], "password":users[selectedUser]["PIN"]}
    r2 = s.post("https://sb-kreuztal.lmscloud.net/cgi-bin/koha/opac-user.pl", data=payload)
    tbl = re.findall('<table id="checkoutst".*<\/table>', r2.text, re.MULTILINE | re.DOTALL)
    if len(tbl) == 0:
        continue
    df_row = pd.read_html(tbl[0])[0]
    df_row["Konto"] = selectedUser
    if df_kreuztal.size == 0:
        df_kreuztal = df_row
    else:
        df_kreuztal = pd.concat([df_kreuztal, df_row])

df_kreuztal["Anz.Verl."] = 2-df_kreuztal["Verlängern"].str.extract(".*(\d) von.*").fillna(0).astype(int)
df_kreuztal["Verfasser"].fillna("Kein Autor", inplace=True)
df_kreuztal["Fälligkeitsdatum"] = pd.to_datetime(df_kreuztal["Fällig"].str.replace("Fälligkeitsdatum:  ", ""), format="%d.%m.%Y")
df_kreuztal["Verlängerbar"] = ~df_kreuztal["Verlängern"].str.contains("Nicht verlängerbar")
df_kreuztal["Bibliothek"] = "Kreuztal"
df_kreuztal

# Create calendar

In [None]:
cols = ["Fälligkeitsdatum", "Bibliothek", "Konto", "Verlängerbar", "Anz.Verl.", "Medientyp", "Verfasser", "Titel"]
df = pd.concat([df_koeln[cols], df_kreuztal[cols]]).reset_index(drop=True)

# Calculate maximum possible date from due date, extension count and media type
df["Max. Datum"] = df.apply(lambda x: x["Fälligkeitsdatum"] + timedelta((2-int(x["Anz.Verl."])) * 7 * (2 if re.match("DVD|Games|Video-Spiel", x["Medientyp"]) else 4)), axis=1)

In [None]:
cal = icalendar.Calendar()
entriesPerEvent = 25
for idx, r in df.groupby("Fälligkeitsdatum"):
    for library, rws in r.groupby("Bibliothek"):
        i = 0
        rws.sort_values(["Konto", "Verlängerbar", "Max. Datum"], inplace=True)
        while True:
            rows = rws[i:i+entriesPerEvent].reset_index(drop=True).reindex()
            event = icalendar.Event()
            event.add("summary", library + " (" + str(rows["Verlängerbar"].sum()) + "/" + str(rows["Verlängerbar"].count()) + " Medien verlängerbar)")
            event.add('dtstart', idx.date())
            event.add('dtstamp', datetime.now())
            alarm = icalendar.Alarm()
            alarm.add("action", "DISPLAY")
            alarm.add("trigger", timedelta(days=-1))
            alarm.add("description", "Medien fällig")
            event.add_component(alarm)
            if not rows["Verlängerbar"].all():
                event.add("priority", "1") 
            event.add("description", "Stand: " + datetime.now().strftime("%d.%m.%Y %H:%M Uhr") + "\n" + "\n".join(rows.apply(lambda row: (row["Konto"] + ":\n" if row.name == 0 or row["Konto"] != rows.loc[row.name-1, "Konto"] else "") + ("x " if not row["Verlängerbar"] else "- ") + row["Verfasser"][:25] + (row["Verfasser"][25:] and "..") + " - " + row["Titel"][:25] + (row["Titel"][25:] and "..") + (" (" + str(2-int(row["Anz.Verl."])) + "x verlängerbar bis " + row["Max. Datum"].strftime("%d.%m.%Y") + ")" if row["Verlängerbar"] else " (NICHT verlängerbar)"), axis=1)))
            cal.add_component(event)
            if len(rows) < entriesPerEvent:
                break
            i += entriesPerEvent
cal.to_ical()