In [None]:
# this notebook is to be used only once, when going from the schedule format used in FS2025 to the new format in HS2025

In [1]:
# main variables
old_csv_path = "data/INI Friday Aperos - Persons.csv"
out_db_path = "data/FS2025.db"

In [2]:
# Transition from CSV to SQLiteDB. This is needed only one time and is thus done ad-hoc
import sqlite3
import pandas as pd

df = pd.read_csv(old_csv_path)
display(df)

all_addresses = df["Email Addresses"].apply(lambda x: x.split("\n"))
df["ContactEmailAddress"] = [addresses[0] for addresses in all_addresses]
df["EmailAddresses"] = all_addresses

df["AperosDone"] = df["Previous Aperos"].apply(lambda x: x.split("\n") if isinstance(x, str) else [])
df["AperosFoundSub"] = [ [] for _ in range(len(df)) ]
df["AperosAWOL"] = [ [] for _ in range(len(df)) ]

df["ProfileURL"] = df["Profile URL"]

position_map = {
    "phd student": "PhD Student",
    "nsc master student": "NSC Master Student",
    "not with INI": "Not INI Student",
}
df["Position"] = df["Position"].apply(lambda x: position_map[x])

df["Comments"] = df["Comments"].apply(lambda x: x if x else "")

del df["Person ID"], df["Email Addresses"], df["Previous Aperos"], df["Profile URL"]

df = df[["Name", "Position", "ContactEmailAddress", "EmailAddresses", "AperosDone", "AperosFoundSub", "AperosAWOL", "Score", "Supervisor", "Comments", "ProfileURL"]]

display(df)

# Connect (creates DB if not exists)
conn = sqlite3.connect(out_db_path)
cursor = conn.cursor()

# Create tables
cursor.executescript("""
CREATE TABLE Persons (
    PersonID INTEGER PRIMARY KEY AUTOINCREMENT,
    Name TEXT NOT NULL,
    Position TEXT,
    ContactEmailAddress TEXT,
    Score REAL,
    Supervisor TEXT,
    Comments TEXT,
    ProfileURL TEXT
);

CREATE TABLE EmailAddresses (
    EmailAddressID INTEGER PRIMARY KEY AUTOINCREMENT,
    PersonID INTEGER NOT NULL,
    EmailAddress TEXT,
    FOREIGN KEY (PersonID) REFERENCES Persons(PersonID)
);

CREATE TABLE AperoAssignments (
    AperoAssignmentID INTEGER PRIMARY KEY AUTOINCREMENT,
    PersonID INTEGER NOT NULL,
    Date TEXT NOT NULL,
    Status TEXT CHECK(Status IN ('done','found_sub','awol')),
    FOREIGN KEY (PersonID) REFERENCES Persons(PersonID)
);
""")

# Insert rows
for _, row in df.iterrows():
    # Insert into Persons
    cursor.execute("""
        INSERT INTO Persons (Name, Position, ContactEmailAddress, Score, Supervisor, Comments, ProfileURL)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    """, (
        row["Name"].title(),
        row["Position"],
        row["ContactEmailAddress"],
        row["Score"],
        row["Supervisor"],
        row["Comments"],
        row["ProfileURL"]
    ))
    person_id = cursor.lastrowid

    # Insert into related tables
    for email in row["EmailAddresses"]:
        cursor.execute("INSERT INTO EmailAddresses (PersonID, EmailAddress) VALUES (?, ?)", (person_id, email))

    for date in row["AperosDone"]:
        cursor.execute("INSERT INTO AperoAssignments (PersonID, Date, Status) VALUES (?, ?, 'done')", (person_id, date))

    for date in row["AperosFoundSub"]:
        cursor.execute("INSERT INTO AperoAssignments (PersonID, Date, Status) VALUES (?, ?, 'found_sub')", (person_id, date))

    for date in row["AperosAWOL"]:
        cursor.execute("INSERT INTO AperoAssignments (PersonID, Date, Status) VALUES (?, ?, 'awol')", (person_id, date))

conn.commit()
conn.close()

print("CSV successfully imported into normalized SQLite database.")

Unnamed: 0,Person ID,Name,Email Addresses,Position,Supervisor,Previous Aperos,Score,Comments,Profile URL
0,66,Gizem Aydemir,aydemir@ethz.ch\ngizem@ini.ethz.ch,phd student,Mehmet Fatih Yanik,2024-03-22,1.000000,Score adjusted based on personal communications,https://services.ini.uzh.ch/admin/modules/uzh/...
1,159,KONSTANTINA VASILAKOU,konstantina.vasilakou@uzh.ch,nsc master student,Richard Hahnloser,2025-03-14,0.975595,,https://services.ini.uzh.ch/admin/modules/uzh/...
2,145,Ekaterina Panchenko,ekaterinamaksimovna.panchenko@uzh.ch,nsc master student,Mehmet Fatih Yanik,2025-02-21,0.899819,,https://services.ini.uzh.ch/admin/modules/uzh/...
3,121,Constanze Albrecht,constanzesophie.albrecht@uzh.ch,nsc master student,Mehmet Fatih Yanik,,0.841745,,https://services.ini.uzh.ch/admin/modules/uzh/...
4,153,Maximilian Schmieschek,maximilianhuberttheodor.schmieschek@uzh.ch,nsc master student,Klaas Stephan,2025-02-28,0.718835,,https://services.ini.uzh.ch/admin/modules/uzh/...
...,...,...,...,...,...,...,...,...,...
86,99,Mohammadali Sharifshazileh,mohamad-ali.sharif@ini.uzh.ch,not with INI,Giacomo Indiveri,,0.000000,,https://services.ini.uzh.ch/admin/modules/uzh/...
87,105,Tristan Torchet,ttorchet@student.ethz.ch,phd student,Melika Payvand,2024-10-25\n2023-10-20,0.000000,,https://services.ini.uzh.ch/admin/modules/uzh/...
88,108,Zuowen Wang,zuowen@ini.uzh.ch,not with INI,Shih-Chii Liu,2024-04-12,0.000000,,https://services.ini.uzh.ch/admin/modules/uzh/...
89,135,Maximilian Miguel Kalcher,maximilianmiguel.kalcher@uzh.ch,nsc master student,Mehmet Fatih Yanik,2025-02-28\n2024-11-08,0.000000,,https://services.ini.uzh.ch/admin/modules/uzh/...


Unnamed: 0,Name,Position,ContactEmailAddress,EmailAddresses,AperosDone,AperosFoundSub,AperosAWOL,Score,Supervisor,Comments,ProfileURL
0,Gizem Aydemir,PhD Student,aydemir@ethz.ch,"[aydemir@ethz.ch, gizem@ini.ethz.ch]",[2024-03-22],[],[],1.000000,Mehmet Fatih Yanik,Score adjusted based on personal communications,https://services.ini.uzh.ch/admin/modules/uzh/...
1,KONSTANTINA VASILAKOU,NSC Master Student,konstantina.vasilakou@uzh.ch,[konstantina.vasilakou@uzh.ch],[2025-03-14],[],[],0.975595,Richard Hahnloser,,https://services.ini.uzh.ch/admin/modules/uzh/...
2,Ekaterina Panchenko,NSC Master Student,ekaterinamaksimovna.panchenko@uzh.ch,[ekaterinamaksimovna.panchenko@uzh.ch],[2025-02-21],[],[],0.899819,Mehmet Fatih Yanik,,https://services.ini.uzh.ch/admin/modules/uzh/...
3,Constanze Albrecht,NSC Master Student,constanzesophie.albrecht@uzh.ch,[constanzesophie.albrecht@uzh.ch],[],[],[],0.841745,Mehmet Fatih Yanik,,https://services.ini.uzh.ch/admin/modules/uzh/...
4,Maximilian Schmieschek,NSC Master Student,maximilianhuberttheodor.schmieschek@uzh.ch,[maximilianhuberttheodor.schmieschek@uzh.ch],[2025-02-28],[],[],0.718835,Klaas Stephan,,https://services.ini.uzh.ch/admin/modules/uzh/...
...,...,...,...,...,...,...,...,...,...,...,...
86,Mohammadali Sharifshazileh,Not INI Student,mohamad-ali.sharif@ini.uzh.ch,[mohamad-ali.sharif@ini.uzh.ch],[],[],[],0.000000,Giacomo Indiveri,,https://services.ini.uzh.ch/admin/modules/uzh/...
87,Tristan Torchet,PhD Student,ttorchet@student.ethz.ch,[ttorchet@student.ethz.ch],"[2024-10-25, 2023-10-20]",[],[],0.000000,Melika Payvand,,https://services.ini.uzh.ch/admin/modules/uzh/...
88,Zuowen Wang,Not INI Student,zuowen@ini.uzh.ch,[zuowen@ini.uzh.ch],[2024-04-12],[],[],0.000000,Shih-Chii Liu,,https://services.ini.uzh.ch/admin/modules/uzh/...
89,Maximilian Miguel Kalcher,NSC Master Student,maximilianmiguel.kalcher@uzh.ch,[maximilianmiguel.kalcher@uzh.ch],"[2025-02-28, 2024-11-08]",[],[],0.000000,Mehmet Fatih Yanik,,https://services.ini.uzh.ch/admin/modules/uzh/...


CSV successfully imported into normalized SQLite database.
