In [16]:
import psycopg2

conn = psycopg2.connect(
    host="localhost",
    port="5433",
    database="krankenhaus",
    user="postgres",
    password="postgres"
)

cur = conn.cursor()


In [17]:
drop_tables = """
-- =========================
-- DROP TABLES
-- =========================

DROP TABLE IF EXISTS Event_Medikament CASCADE;
DROP TABLE IF EXISTS Arzt_Event CASCADE;
DROP TABLE IF EXISTS Event CASCADE;
DROP TABLE IF EXISTS Behandlung CASCADE;
DROP TABLE IF EXISTS Medikament CASCADE;
DROP TABLE IF EXISTS Bett CASCADE;
DROP TABLE IF EXISTS Reserve CASCADE;
DROP TABLE IF EXISTS Pflegekraft CASCADE;
DROP TABLE IF EXISTS Arzt CASCADE;
DROP TABLE IF EXISTS Patient CASCADE;
DROP TABLE IF EXISTS Raum CASCADE;
DROP TABLE IF EXISTS Station CASCADE;
DROP TABLE IF EXISTS Krankenhaus CASCADE;
"""



In [18]:
conn.rollback()

cur.execute(drop_tables)
conn.commit()



In [19]:
create_tables = """
CREATE TABLE Krankenhaus (
    KH_ID INT PRIMARY KEY,
    Strasse VARCHAR(100) NOT NULL,
    Hausnummer VARCHAR(10) NOT NULL,
    Postleitzahl VARCHAR(10) NOT NULL,
    Stadt VARCHAR(50) NOT NULL,
    Name VARCHAR(100) NOT NULL,
    CHECK (Postleitzahl ~ '^[0-9]{4}$')
);

CREATE TABLE Station (
    Station_ID INT PRIMARY KEY,
    Name VARCHAR(50) NOT NULL,
    Kapazitaet INT NOT NULL,
    KH_ID INT NOT NULL,
    CHECK (Kapazitaet > 0),
    FOREIGN KEY (KH_ID) REFERENCES Krankenhaus(KH_ID)
);

CREATE TABLE Raum (
    Raumnummer INT PRIMARY KEY,
    Station_ID INT NOT NULL,
    Art VARCHAR(50),
    FOREIGN KEY (Station_ID) REFERENCES Station(Station_ID)
);

CREATE TABLE Pflegekraft (
    Pflege_ID INT PRIMARY KEY,
    Telefonnummer VARCHAR(30),
    Vorname VARCHAR(50) NOT NULL,
    Nachname VARCHAR(50) NOT NULL,
    Station_ID INT NOT NULL,
    CHECK (Telefonnummer IS NULL OR Telefonnummer ~ '^[0-9+ ]+$'),
    FOREIGN KEY (Station_ID) REFERENCES Station(Station_ID)
);

CREATE TABLE Patient (
    Patient_ID INT PRIMARY KEY,
    Vorname VARCHAR(50) NOT NULL,
    Nachname VARCHAR(50) NOT NULL,
    Versicherungsnummer VARCHAR(30) UNIQUE NOT NULL,
    Geburtsdatum DATE NOT NULL,
    Strasse VARCHAR(100),
    Hausnummer VARCHAR(10),
    Postleitzahl VARCHAR(10),
    Stadt VARCHAR(50),
    Elektronisches_Patientendossier_ID VARCHAR(50) UNIQUE,
    Station_ID INT,
    CHECK (Postleitzahl IS NULL OR Postleitzahl ~ '^[0-9]{4}$'),
    FOREIGN KEY (Station_ID) REFERENCES Station(Station_ID)
);

CREATE TABLE Bett (
    Bett_ID INT PRIMARY KEY,
    Status VARCHAR(20) NOT NULL,
    Patient_ID INT,
    Station_ID INT NOT NULL,
    CHECK (Status IN ('frei', 'belegt')),
    FOREIGN KEY (Patient_ID) REFERENCES Patient(Patient_ID),
    FOREIGN KEY (Station_ID) REFERENCES Station(Station_ID)
);

CREATE TABLE Arzt (
    Arzt_ID INT PRIMARY KEY,
    Telefonnummer VARCHAR(30),
    Vorname VARCHAR(50) NOT NULL,
    Nachname VARCHAR(50) NOT NULL,
    ist_extern BOOLEAN NOT NULL,
    Station_ID INT,
    CHECK (Telefonnummer IS NULL OR Telefonnummer ~ '^[0-9+ ]+$'),
    FOREIGN KEY (Station_ID) REFERENCES Station(Station_ID)
);

CREATE TABLE Reserve (
    Reserve_ID INT PRIMARY KEY,
    Telefonnummer VARCHAR(30),
    Arzt_ID INT,
    Station_ID INT,
    Pflegekraft_ID INT,
    CHECK (Telefonnummer IS NULL OR Telefonnummer ~ '^[0-9+ ]+$'),
    FOREIGN KEY (Arzt_ID) REFERENCES Arzt(Arzt_ID),
    FOREIGN KEY (Station_ID) REFERENCES Station(Station_ID),
    FOREIGN KEY (Pflegekraft_ID) REFERENCES Pflegekraft(Pflege_ID)
);

CREATE TABLE Behandlung (
    Behandlungs_ID INT PRIMARY KEY,
    abgeschlossen BOOLEAN NOT NULL,
    Beschreibung VARCHAR(255),
    Start_Datum DATE NOT NULL,
    End_Datum DATE,
    Patient_ID INT NOT NULL,
    FOREIGN KEY (Patient_ID) REFERENCES Patient(Patient_ID),
    CHECK (End_Datum IS NULL OR End_Datum >= Start_Datum)
);

CREATE TABLE Medikament (
    Medikament_ID INT PRIMARY KEY,
    Wirkstoff VARCHAR(100),
    Name VARCHAR(100) NOT NULL   
);

CREATE TABLE Event (
    Event_ID INT PRIMARY KEY,
    Beschreibung VARCHAR(255),
    Uhrzeit TIME NOT NULL,
    Datum DATE NOT NULL,
    RaumID INT NOT NULL,
    Behandlung_ID INT NOT NULL,
    FOREIGN KEY (RaumID) REFERENCES Raum(Raumnummer),
    FOREIGN KEY (Behandlung_ID) REFERENCES Behandlung(Behandlungs_ID)
);


CREATE TABLE Arzt_Event (
    Arzt_ID INT NOT NULL,
    Event_ID INT NOT NULL,
    PRIMARY KEY (Arzt_ID, Event_ID),
    FOREIGN KEY (Arzt_ID) REFERENCES Arzt(Arzt_ID),
    FOREIGN KEY (Event_ID) REFERENCES Event(Event_ID)
);

CREATE TABLE Event_Medikament (
    Event_ID INT NOT NULL,
    Medikament_ID INT NOT NULL,
    PRIMARY KEY (Event_ID, Medikament_ID),
    FOREIGN KEY (Event_ID) REFERENCES Event(Event_ID),
    FOREIGN KEY (Medikament_ID) REFERENCES Medikament(Medikament_ID)
);
"""
cur.execute(create_tables)
conn.commit()


In [20]:
import random
import datetime
from faker import Faker

fake = Faker("de_CH")
random.seed(1)

TODAY = datetime.date(2025, 12, 12)

def phone(i):
    return f"+41 79 {i:06d}"

# =========================
# HARDCODED CASES
# =========================

BEHANDLUNG_CASES = [
    "Appendektomie Nachkontrolle",
    "Blutdruckeinstellung",
    "Diabetes Verlaufskontrolle",
    "Atemwegsinfekt Behandlung",
    "Herz-Kreislauf Check",
    "Wundversorgung",
    "Schmerztherapie",
    "Magen-Darm Untersuchung",
    "Präoperative Abklärung",
    "Postoperative Nachsorge"
]

EVENT_BESCHREIBUNGEN = [
    "Visite",
    "Operation",
    "Nachkontrolle",
    "Medikamentengabe",
    "Notfallbehandlung",
    "Diagnostische Untersuchung",
    "Pflegevisite",
    "Therapieanpassung",
    "Laborbesprechung",
    "Entlassungsgespräch"
]

STATION_NAMES = [
    "Chirurgie",
    "Innere Medizin",
    "Pädiatrie",
    "Notfall",
    "Intensivstation",
    "Radiologie"
]

RAUM_ARTEN = [
    "Operationssaal",
    "Untersuchungsraum"
]

MEDIKAMENTE = [
    (1, "Paracetamol", "Paracetamol"),
    (2, "Ibuprofen", "Ibuprofen"),
    (3, "Aspirin", "Acetylsalicylsäure"),
    (4, "Amoxicillin", "Amoxicillin"),
    (5, "Insulin", "Insulin")
]

# =========================
# Krankenhaus (3)
# =========================
for kh_id in range(1, 4):
    cur.execute("""
        INSERT INTO Krankenhaus (KH_ID, Strasse, Hausnummer, Postleitzahl, Stadt, Name)
        VALUES (%s,%s,%s,%s,%s,%s)
    """, (
        kh_id,
        fake.street_name(),
        fake.building_number(),
        f"{1000 + kh_id}",
        fake.city(),
        f"Kantonsspital {kh_id}"
    ))
conn.commit()

# =========================
# Station (15) -> 3 Spitäler, je 5 Stationen (nicht exakt gleich, aber logisch)
# =========================
station_id = 1
station_rows = []  # (station_id, kh_id)
for kh_id in range(1, 4):
    chosen = random.sample(STATION_NAMES, 5)
    for name in chosen:
        kap = random.randint(25, 55) + (10 if name == "Intensivstation" else 0) + (5 if name == "Notfall" else 0)
        cur.execute("""
            INSERT INTO Station (Station_ID, Name, Kapazitaet, KH_ID)
            VALUES (%s,%s,%s,%s)
        """, (
            station_id,
            name,
            kap,
            kh_id
        ))
        station_rows.append((station_id, kh_id))
        station_id += 1
conn.commit()

NUM_STATIONS = station_id - 1

# =========================
# Raum (60) -> 4 Räume pro Station, Mischung OP/Untersuchung (mehr Untersuchungsräume)
# =========================
raum_id = 1
for sid in range(1, station_id):
    for _ in range(4):
        art = random.choices(RAUM_ARTEN, weights=[2, 5], k=1)[0]
        cur.execute("""
            INSERT INTO Raum (Raumnummer, Station_ID, Art)
            VALUES (%s,%s,%s)
        """, (raum_id, sid, art))
        raum_id += 1
conn.commit()

NUM_RAEUME = raum_id - 1

# =========================
# Medikament (5) hardcoded
# =========================
for mid, name, wirkstoff in MEDIKAMENTE:
    cur.execute("""
        INSERT INTO Medikament (Medikament_ID, Wirkstoff, Name)
        VALUES (%s,%s,%s)
    """, (mid, wirkstoff, name))
conn.commit()

# =========================
# Patient (180) -> verteilt über Stationen (Notfall/Innere/Chirurgie etwas mehr)
# =========================
station_name_by_id = {}
cur.execute("SELECT station_id, name FROM station;")
for sid, name in cur.fetchall():
    station_name_by_id[sid] = name

def station_weight(sid: int) -> int:
    name = station_name_by_id.get(sid, "")
    if name == "Notfall":
        return 6
    if name == "Innere Medizin":
        return 5
    if name == "Chirurgie":
        return 4
    if name == "Intensivstation":
        return 3
    if name == "Pädiatrie":
        return 3
    if name == "Radiologie":
        return 2
    return 2

station_ids = list(range(1, station_id))
station_weights = [station_weight(s) for s in station_ids]

NUM_PATIENTS = 180
for pid in range(1, NUM_PATIENTS + 1):
    sid = random.choices(station_ids, weights=station_weights, k=1)[0]
    cur.execute("""
        INSERT INTO Patient
        (Patient_ID, Vorname, Nachname, Versicherungsnummer, Geburtsdatum, Strasse, Hausnummer, Postleitzahl, Stadt, Elektronisches_Patientendossier_ID, Station_ID)
        VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
    """, (
        pid,
        fake.first_name(),
        fake.last_name(),
        f"CH{pid:08d}",                  # UNIQUE
        fake.date_of_birth(minimum_age=0, maximum_age=95),
        fake.street_name(),
        fake.building_number(),
        f"{1000 + (pid % 50):04d}",
        fake.city(),
        f"EPD-{pid}",                    # UNIQUE
        sid
    ))
conn.commit()

# =========================
# Arzt (24) -> ungleich auf Stationen verteilt (Notfall/Chirurgie/Innere mehr)
# =========================
NUM_AERZTE = 24
for aid in range(1, NUM_AERZTE + 1):
    sid = random.choices(station_ids, weights=station_weights, k=1)[0]
    cur.execute("""
        INSERT INTO Arzt
        (Arzt_ID, Telefonnummer, Vorname, Nachname, ist_extern, Station_ID)
        VALUES (%s,%s,%s,%s,%s,%s)
    """, (
        aid,
        phone(aid),
        fake.first_name(),
        fake.last_name(),
        (aid % 7 == 0),   # ein paar extern
        sid
    ))
conn.commit()

# =========================
# Pflegekraft (48) -> mehr als Ärzte, ebenfalls ungleich verteilt
# =========================
NUM_PFLEGE = 48
for pk in range(1, NUM_PFLEGE + 1):
    sid = random.choices(station_ids, weights=station_weights, k=1)[0]
    cur.execute("""
        INSERT INTO Pflegekraft
        (Pflege_ID, Telefonnummer, Vorname, Nachname, Station_ID)
        VALUES (%s,%s,%s,%s,%s)
    """, (
        pk,
        phone(200 + pk),
        fake.first_name(),
        fake.last_name(),
        sid
    ))
conn.commit()

# =========================
# Reserve (15) -> Mischungen, manchmal nur Arzt oder nur Pflegekraft (NULL erlaubt)
# =========================
NUM_RESERVE = 15
for rid in range(1, NUM_RESERVE + 1):
    sid = random.choice(station_ids)
    arzt_id = random.choice(range(1, NUM_AERZTE + 1)) if random.random() < 0.7 else None
    pflege_id = random.choice(range(1, NUM_PFLEGE + 1)) if random.random() < 0.8 else None
    cur.execute("""
        INSERT INTO Reserve
        (Reserve_ID, Telefonnummer, Arzt_ID, Station_ID, Pflegekraft_ID)
        VALUES (%s,%s,%s,%s,%s)
    """, (
        rid,
        phone(500 + rid),
        arzt_id,
        sid,
        pflege_id
    ))
conn.commit()

# =========================
# Bett (300) -> 20 Betten pro Station
# belegt: ca. 65% (mit Patient_ID), frei: Rest
# =========================
bett_id = 1
patients_iter = iter(range(1, NUM_PATIENTS + 1))
occupied_ratio = 0.65

for sid in range(1, station_id):
    for _ in range(20):
        if random.random() < occupied_ratio:
            try:
                pid = next(patients_iter)
            except StopIteration:
                pid = None
            status = "belegt" if pid is not None else "frei"
            patient_id = pid
        else:
            status = "frei"
            patient_id = None

        cur.execute("""
            INSERT INTO Bett
            (Bett_ID, Status, Patient_ID, Station_ID)
            VALUES (%s,%s,%s,%s)
        """, (bett_id, status, patient_id, sid))
        bett_id += 1

conn.commit()

# =========================
# Behandlung (220) -> mehrere Behandlungen pro Patient, ungleich verteilt
# Regeln:
#   - abgeschlossen=False: Start nach TODAY, End_Datum NULL
#   - abgeschlossen=True: Start ab 2022-01-01 bis TODAY, End <= TODAY, End>=Start
# =========================
NUM_BEHANDLUNGEN = 220

patient_pool = list(range(1, NUM_PATIENTS + 1))
# Unwucht: einige Patienten kommen öfter vor (z.B. chronische Fälle)
heavy_patients = random.sample(patient_pool, 25)
weights_pat = [5 if p in heavy_patients else 1 for p in patient_pool]

for bid in range(1, NUM_BEHANDLUNGEN + 1):
    patient_id = random.choices(patient_pool, weights=weights_pat, k=1)[0]
    abgeschlossen = (random.random() > 0.35)

    if abgeschlossen:
        start = fake.date_between(datetime.date(2022, 1, 1), TODAY)
        end = start + datetime.timedelta(days=random.randint(1, 30))
        if end > TODAY:
            end = TODAY
    else:
        start = TODAY + datetime.timedelta(days=random.randint(1, 90))
        end = None

    cur.execute("""
        INSERT INTO Behandlung
        (Behandlungs_ID, abgeschlossen, Beschreibung, Start_Datum, End_Datum, Patient_ID)
        VALUES (%s,%s,%s,%s,%s,%s)
    """, (
        bid,
        abgeschlossen,
        random.choice(BEHANDLUNG_CASES),
        start,
        end,
        patient_id
    ))
conn.commit()

# =========================
# Event (~500) -> 1 bis 3 Events pro Behandlung
# Datum:
#   - wenn Behandlung NICHT abgeschlossen: Event-Datum immer in Zukunft (>= TODAY+1)
#   - wenn Behandlung abgeschlossen: Event-Datum zwischen Start und End (oder Start..TODAY)
# Event hat Beschreibung (wie du es wolltest)
# =========================
# Dazu brauchen wir Behandlungsdaten
cur.execute("SELECT behandlungs_id, abgeschlossen, start_datum, COALESCE(end_datum, %s) FROM behandlung;", (TODAY,))
behandlungen = cur.fetchall()

event_id = 1
for behandlungs_id, abgeschlossen, start_datum, end_datum_fallback in behandlungen:
    n_events = random.randint(1, 3)

    for _ in range(n_events):
        if abgeschlossen:
            # Event im Zeitraum der Behandlung
            start = start_datum
            end = end_datum_fallback
            if end < start:
                end = start
            ev_date = fake.date_between(start, end)
        else:
            # Zukunft
            ev_date = TODAY + datetime.timedelta(days=random.randint(1, 90))

        cur.execute("""
            INSERT INTO Event
            (Event_ID, Beschreibung, Uhrzeit, Datum, RaumID, Behandlung_ID)
            VALUES (%s,%s,%s,%s,%s,%s)
        """, (
            event_id,
            random.choice(EVENT_BESCHREIBUNGEN),
            fake.time(),
            ev_date,
            random.randint(1, NUM_RAEUME),
            behandlungs_id
        ))
        event_id += 1

conn.commit()

NUM_EVENTS = event_id - 1

# =========================
# Arzt_Event -> ungleich verteilt, aber nicht absurd:
# ein paar Ärzte haben mehr, jeder hat mindestens ein paar
# =========================
# Grundlast pro Arzt
arzt_event_counts = {aid: 0 for aid in range(1, NUM_AERZTE + 1)}

# Gewichte: einige "busy doctors"
busy = random.sample(range(1, NUM_AERZTE + 1), 6)
weights_arzt = []
for aid in range(1, NUM_AERZTE + 1):
    weights_arzt.append(6 if aid in busy else 2)

for eid in range(1, NUM_EVENTS + 1):
    arzt_id = random.choices(range(1, NUM_AERZTE + 1), weights=weights_arzt, k=1)[0]
    arzt_event_counts[arzt_id] += 1
    cur.execute("""
        INSERT INTO Arzt_Event (Arzt_ID, Event_ID)
        VALUES (%s,%s)
    """, (arzt_id, eid))

conn.commit()

# =========================
# Event_Medikament -> 1 bis 2 Medikamente pro Event, ohne Duplikate (PK sicher)
# =========================
for eid in range(1, NUM_EVENTS + 1):
    meds = random.sample([1, 2, 3, 4, 5], random.randint(1, 2))
    for mid in meds:
        cur.execute("""
            INSERT INTO Event_Medikament (Event_ID, Medikament_ID)
            VALUES (%s,%s)
        """, (eid, mid))

conn.commit()

print("Daten erfolgreich eingefügt:")
print(f"Krankenhaus: 3 | Stationen: {NUM_STATIONS} | Räume: {NUM_RAEUME} | Betten: {bett_id-1}")
print(f"Patienten: {NUM_PATIENTS} | Ärzte: {NUM_AERZTE} | Pflegekräfte: {NUM_PFLEGE} | Reserve: {NUM_RESERVE}")
print(f"Behandlungen: {NUM_BEHANDLUNGEN} | Events: {NUM_EVENTS}")


Daten erfolgreich eingefügt:
Krankenhaus: 3 | Stationen: 15 | Räume: 60 | Betten: 300
Patienten: 180 | Ärzte: 24 | Pflegekräfte: 48 | Reserve: 15
Behandlungen: 220 | Events: 441


In [21]:
conn.rollback()
