# Section Schedule

Schedule students to sections based on their preferences. This notebook also contains scripts for generating section control codes for a fully-integrated solution.

## Setup

Import the required packages into the namespace.

In [None]:
import numpy as np
import pandas as pd

import itertools
from typing import NamedTuple

In [None]:
SEED = sum(ord(c) for c in 'Computer Science Mentors')

## Generate control codes

In [None]:
def generate_control_code(row, length=6):
    """Return a control code of the desired length, zero-padded as necessary."""
    return str(abs(hash(tuple(row))))[:length].zfill(length)

In [None]:
# room_schedule = pd.read_csv('room_schedule.csv')
room_schedule = pd.DataFrame.from_records(
    [
        ('kevinlin1@berkeley.edu', 'CS 61A', 'Soda 283F', '4', 'Mon 11:00 AM'),
        ('kevinlin1@berkeley.edu', 'CS 61A', 'Soda 283F', '4', 'Mon 10:00 AM'),
    ],
    columns=['Email Address', 'Course', 'Room', 'Capacity', 'Time']
)

room_schedule['Code'] = room_schedule.apply(generate_control_code, axis=1, raw=True)
room_schedule = room_schedule.set_index('Code')
room_schedule.head()

### Export schedule

In [None]:
room_schedule['Code'].to_csv('control_codes.csv', index=False)

## Input data

Load student preferences from a Google Form.

In [None]:
EMAIL = 'Email Address'
FIRST = 'First choice'
SECOND = 'Second choice'
THIRD = 'Third choice'
BACKUP = 'Backup'
RANKS = (FIRST, SECOND, THIRD)

# preferences = pd.read_csv('preferences.csv', dtype=str)
preferences = pd.DataFrame.from_records(
    [
        ('kvnln@berkeley.edu', 'CS 61A', room_schedule.index[0], room_schedule.index[1], '', ''),
        ('kvnln@berkeley.edu', 'CS 61A', room_schedule.index[0], room_schedule.index[1], '', ''),
    ],
    columns=[EMAIL, 'Course', FIRST, SECOND, THIRD, BACKUP]
)

preferences = pd.concat([
    preferences.drop(columns=BACKUP).rename(columns={EMAIL: 'Email'}),
    preferences[BACKUP].str.split(',', expand=True).fillna('').astype(str)
], axis=1).rename(columns=str)

## Greedy algorithm

Solve the problem using a simple greedy algorithm with randomized restarts.

In [None]:
class Solution(NamedTuple):
    """Solution to an assignment problem."""
    assignments: dict
    stats: dict

    def metric(solution, weights={FIRST: 3, SECOND: 2, THIRD: 1}):
        """
        Given a Solution with a stats histogram, assign weights to each rank to
        evaluate the overall quality of the solution based on the weights.
        """
        return sum(count * weights[rank] for rank, count in solution.stats.items())

    @property
    def score(self):
        return Solution.metric(self)

class Assignment(NamedTuple):
    email: str
    course: str

def greedy(preferences, schedule, ranks=RANKS, preference_slice=None):
    preferences = preferences.rename(columns=str.lower)
    schedule = schedule.rename(columns=str.lower)
    if preference_slice is None:
        preference_slice = slice(pd.Index(preferences.columns).get_loc(FIRST.lower()),
                                 len(preferences.columns))
    assigned = set()
    assignments = {}
    stats = {rank: 0 for rank in ranks}
    for row in preferences.itertuples(index=False):
        assignment = Assignment(row.email, row.course)
        if assignment not in assignments:
            for rank, preference in itertools.zip_longest(ranks, row[preference_slice]):
                assert preference in schedule.index, 'preference not found in schedule'
                assert row.course == schedule.loc[preference].course, 'desired course not found'
                if preference not in assigned:
                    assignments[assignment] = preference
                    if rank in stats:
                        stats[rank] += 1
                    assigned.add(preference)
                    break
    return Solution(assignments, stats)

In [None]:
LIMIT = 1000
rand = np.random.RandomState(SEED)

In [None]:
best = max((
    greedy(preferences.sample(frac=1, random_state=rand), room_schedule)
    for _ in range(LIMIT)
), key=Solution.metric)
best.stats

In [None]:
best.assignments

### Export schedule

In [None]:
greedy_schedule = pd.DataFrame.from_records((
    (assignment.email, assignment.course, section)
    for assignment, section in best.assignments.items()
), columns=['Email Address', 'Course', 'Assignment'])

In [None]:
greedy_schedule.to_csv('greedy_schedule.csv', index=False)

## Mathematic algorithm

Solve the problem with a mathematic approach using zero-one (binary) linear programming.

In [None]:
# Determine the best way to formulate the problem

### Export schedule

In [None]:
section_schedule.to_csv('section_schedule.csv', index=False)