In [None]:
%pip install beautifulsoup4 pqdm pulp networkx

In [8]:
import pulp
import pandas as pd
from datetime import datetime

In [4]:
winf_cbk = [
    "5105",  # Jahresabschluss und Unternehmensberichte
    "5107",  # Global Business
    "5106",  # Grundlagen der Wirtschaftsinformatik
    "5108",  # Funktionsübergreifende Betriebswirtschaftslehre - Prozesse und Entscheidungen
    "5056",  # Mikroökonomik (6056 ist Angewandte Mikroökonomik)
    "5059",  # Makroökonomik (6059 ist Internationale Makroökonomik)
    "5117",  # Zukunftsfähiges Wirtschaften: Vertiefung und Anwendung
    "5109",  # Wirtschaftsprivatrecht (6021 ist Wirtschaft im rechtlichen Kontext - Wirtschaftsprivatrecht I)
    "6023",  # Mathematik
    "6024",  # Statistik
    "5136",  # Standards wissenschaftlichen Arbeitens und Zitierens (6911 ist Grundlagen wissenschaftlichen Arbeitens)
]

# Vorraussetzungen um Kurse aus dem Hauptstudium zu belegen:
# mind 20 ECTS aus dem CBK

winf_hauptstudium = [
    "6012",  # Beschaffung, Logistik, Produktion
    "5155",  # Grundlagen und Methoden des Data und Knowledge Engineering
    "9485",  # Algorithmisches Denken und Programmierung
    "5158",  # Rechnernetzwerke und Datenübermittlung: Grundlagen und Sicherheit
    "5160",  # Design von betrieblichen Informationssystemen
    "5161",  # Governance und Management von IT-Projekten
    "5162",  # Forschungsmethoden der Wirtschaftsinformatik
]

vvzModel = pd.read_pickle("../0_daten/vvzModel.pkl")
relevantVvz = vvzModel[vvzModel["planpunkte_ids"].apply(lambda ids: any(str(id_) in (winf_cbk + winf_hauptstudium) for id_ in ids))]

In [7]:
def select_best_course(picked_df, available_df, cbk_ids, hs_ids):
    def is_on_valid_day(session):
        for s in session:
            # day = datetime.strptime(s['start'], "%Y-%m-%d %H:%M:%S").strftime('%A')
            day = s['start'].strftime('%A')
            # if day in ['Monday', 'Tuesday']:
            if day in ['Monday']:
                return False
        return True

    def overlaps_too_much(session1, session2, max_minutes=15):
        for s1 in session1:
            s1_start = s1['start']
            s1_end = s1['end']
            for s2 in session2:
                s2_start = s2['start']
                s2_end = s2['end']
                latest_start = max(s1_start, s2_start)
                earliest_end = min(s1_end, s2_end)
                overlap = (earliest_end - latest_start).total_seconds() / 60
                if overlap > max_minutes:
                    return True
        return False

    def cbk_completed(picked_df):
        # return True
        if picked_df is None:
            return False
        cbk_courses = picked_df[picked_df['planpunkte_ids'].apply(lambda ids: any(i in cbk_ids for i in ids))]
        if cbk_courses['ects'].sum() < 20:
            return False
        now = datetime.now()
        return all(max(s['end'] for s in row['dates']) < now for _, row in cbk_courses.iterrows())

    available_df = available_df.copy()
    available_df['first_start'] = available_df['dates'].apply(lambda d: min(s['start'] for s in d))

    valid_courses = []

    for _, course in available_df.iterrows():
        session = course['dates']
        if not is_on_valid_day(session):
            continue

        is_hs = any(i in hs_ids for i in course['planpunkte_ids'])

        if is_hs and not cbk_completed(picked_df):
            continue

        if picked_df is not None:
            conflict = False
            for _, picked in picked_df.iterrows():
                if overlaps_too_much(session, picked['dates']):
                    conflict = True
                    break
            if conflict:
                continue

        valid_courses.append(course)

    if not valid_courses:
        return None

    if picked_df is not None and not picked_df.empty:
        last_end = max(s['end'] for row in picked_df['dates'] for s in row)
    else:
        last_end = pd.Timestamp("1970-01-01")

    return sorted(valid_courses, key=lambda c: (-c['ects'], abs((c['first_start'] - last_end).total_seconds())))[0]

In [10]:
def get_greedy_pick():
    def get_visited_planpunkte():
        def flatten(xss):
            return [x for xs in xss for x in xs]
        return list(set(flatten([ids for ids in picked_df["planpunkte_ids"]])))

    picked_df = pd.DataFrame()
    picked_course = select_best_course(None, relevantVvz, winf_cbk, winf_hauptstudium)

    while picked_course is not None:
        picked_df = pd.concat([picked_df, pd.DataFrame(picked_course).T])
        visited_planpunkkte = get_visited_planpunkte()
        picked_course = select_best_course(picked_df, relevantVvz[relevantVvz["planpunkte_ids"].apply(lambda ids: not any(str(id_) in (visited_planpunkkte) for id_ in ids))], winf_cbk, winf_hauptstudium)
    
    return picked_df


get_greedy_pick()["ects"].sum()

31.0