In [88]:
"""Build the combined data dictionary and the student placement dictionary"""
import json

project_docs = open("./mentor_index_dump_2", "r", encoding="utf-8")
project_size_data = json.load(open("./mentors_size_data.json", "r", encoding="utf-8"))
project_size_dict = {project["proj_id"]: project["proj_size_remaining"] for project in project_size_data}

all_project_data = {}
student_placements = {}
for line in project_docs:
    project_doc = json.loads(line)
    if project_doc["id"] == 'recRJGXhTr9lEq1tO':
        continue
    id_ = project_doc["id"]
    proj_size = project_size_dict[id_]
    if proj_size == 0:
        continue
    all_project_data[id_] = project_doc
    all_project_data[id_]["proj_size_remaining"] = proj_size
    all_project_data[id_]["num_first_choice"] = len([choice for choice in project_doc.get('listStudentsSelected') if choice["choice"] == 1])

    student_placements[id_] = {"students": [], "proj_capacity": project_size_dict[id_]}
print(all_project_data)

{'recVXa5uqnQ8MpDzJ': {'numStudentsSelected': 5, 'backgroundRural': True, 'timezone': -4, 'proj_tags': ['Frontend', 'Data'], 'bio': 'I am a passionate software developer with extensive practical experience with Windows app development using C#, macOS app development using Swift, and in more programming languages that I can even count. I have worked at eFlex Systems and Oakland University. I grew up in the Greater Detroit Area in the United States of America. Learn more about me on my LinkedIn: https://www.linkedin.com/in/william-kent-13847711a/ (pronouns: he/his)', 'proj_description': 'Create a spreadsheet program for Windows 10 using C#, Windows Forms, and .NET 5.', 'preferStudentUnderRep': 0, 'okExtended': True, 'okTimezoneDifference': False, 'preferToolExistingKnowledge': True, 'name': 'William Kent', 'company': 'Sunburst Solutions', 'id': 'recVXa5uqnQ8MpDzJ', 'track': 'Beginner', 'listStudentsSelected': [{'student_id': 'recJHT6W7cvgVB3PI', 'choice': 2}, {'student_id': 'recAEjPjzBpO

In [69]:
# Build analysis data
# # first choice votes for every project
# import numpy as np
# import matplotlib.pyplot as plt
# from matplotlib import colors
#
#
# first_choices = {}
# for proj_id, proj_data in all_project_data.items():
#     try:
#         picks = [choice for choice in proj_data.get('listStudentsSelected') if choice["choice"] == 1]
#         first_choices[proj_id] = {"first_choices": picks, "num": len(picks)}
#     except:
#         pass
# print(first_choices)
#
#
# num_zero = 0
# num_one = 0
# num_two = 0
# num_three = 0
# num_first_choice = 0
#
# for k, proj in first_choices.items():
#     num_first_choice += proj["num"]
#     if proj["num"] == 0:
#         num_zero += 1
#     if proj["num"] == 1:
#         num_one += 1
#     if proj["num"] == 2:
#         num_two += 2
#     if proj["num"] > 3:
#         num_three += 3
#
# fig, axs = plt.subplots()
# N, bins, patches = axs.hist([choice["num"] for k, choice in first_choices.items()])
# fracs = N / N.max()
# norm = colors.Normalize(fracs.min(), fracs.max())
# for thisfrac, thispatch in zip(fracs, patches):
#     color = plt.cm.viridis(norm(thisfrac))
#     thispatch.set_facecolor(color)
perfect_first_choice_project_count = 0
less_first_choice_project_count = 0
greater_first_choice_project_count = 0
for k, project in all_project_data.items():
    if project.get('proj_size_remaining') != 3:
        if project.get('proj_size_remaining') == project.get('num_first_choice'):
            perfect_first_choice_project_count += 1
        if project.get('proj_size_remaining') > project.get('num_first_choice'):
            less_first_choice_project_count += 1
        if project.get('proj_size_remaining') < project.get('num_first_choice'):
            greater_first_choice_project_count += 1
students = []
for i in [[student["student_id"] for student in project["listStudentsSelected"]] for k, project in all_project_data.items()]:
    for j in i:
        students.append(j)

num_students = set(students)

In [89]:
""" Create final matches dict

Ok so here's the plan:
1. Start with projects that are 'perfect' - the same number of first choice votes as students needed. Assign students
    to those by adding their information to the saved project dictionary. Remove those student's votes from all projects
    to avoid duplicates. Projects can also be removed from the list for simplicity - Maybe not?.
2. Then, assign first choice votes to students on projects with less first choice votes than the projects need. Also
    remove those students votes from other projects. Decrement `proj_size_remaining`.
3. Then, assign second, third, and more choice votes as needed until `proj_size_remaining` = 0, them remove the project.
    Do this in order, all second place votes, third place votes, and so on so that students get their lowest possible
    choice. If multiple students are tied, be sure to assign based on which student has the fewest votes left in other
    projects. Also remember to remove the student from all other projects when their vote is saved.
4. Once all projects with less first choice votes than needed are dealt with, we are left with only projects that
    started with more than enough first choice votes. These should have exactly the correct number of first choice votes
    left due to how students have been removed. Assign these students, and complain loudly if something is wrong.

The data used is a dict called `all_project_data` that contains all projects with the project data, student choices, `num_first_choice` (for convenience), and  `proj_size_remaining`,
```json
all_project_data = {
  "<proj_id>": {
    <project information from elastic>,
    "listStudentsSelected": [{"student_id": <student_id>, "choice": 1}, ...],
    "proj_size_remaining": 5 ,
    "num_first_choice": 3,
  },
  ...
}
```

as well as a dict called `student_placements`  that will be used to store the final placements:
```json
student_placements = {
  "<proj_id>": {
    "students": ["<student_id>", ...],
    "proj_capacity": 5  # Allows for double checking later
  },
  ...
}
```
"""
from typing import Tuple, Union
from copy import deepcopy


def count_student_votes(project_data_dict: dict, student_id: str) -> int:
    student_votes = 0
    for k, project in project_data_dict.items():
        student_votes += len([student for student in project['listStudentsSelected'] if student["student_id"] == student_id])
    return student_votes


def remove_student(project_data_dict: dict, student_id: str) -> dict:
    """Removes the provided student ID from the all_project_data dict"""
    for id, project in project_data_dict.items():
        project_data_dict[id]["listStudentsSelected"] = [
            student
            for student in project["listStudentsSelected"]
            if student["student_id"] != student_id
        ]
    return project_data_dict


def place_student(
    project_data_dict: dict, placement_dict: dict, project_id: str, student_id: str
) -> Tuple[dict, dict]:
    """Places the student in the project, handles removing the student from project data as well as decrementing
    needed students and placing needed info in placement_dict. Also removes completed projects from the project data and
    decrements "num_first_choice" if needed.

    :return project_data_dict, placement_dict
    """
    choice = 0
    for student in project_data_dict[project_id]["listStudentsSelected"]:
        if student["student_id"] == student_id:
            choice = student["choice"]

    placement_dict[project_id]["students"].append(student_id)
    project_data_dict = remove_student(project_data_dict, student_id)
    project_data_dict[project_id]["proj_size_remaining"] -= 1
    if choice == 1:
        project_data_dict[project_id]["num_first_choice"] -= 1

    if project_data_dict[project_id]["proj_size_remaining"] <= 0:
        del project_data_dict[project_id]

    return project_data_dict, placement_dict


def place_students_of_choice(
    project_data_dict: dict,
    placement_dict: dict,
    project_id: str,
    choice: int,
    num: int,
) -> Tuple[dict, dict]:
    """Will place the first `num` students of choice `choice` on a project, or will place until all students of `choice`
    have been placed. Chooses between valid students by picking those who appear the least frequently in other remaining
    votes."""

    counter = 0
    modified_project_data_dict = deepcopy(project_data_dict)
    for student in project_data_dict[project_id]["listStudentsSelected"]:
        if counter >= num:
            break
        if student["choice"] == choice:
            place_student(modified_project_data_dict, placement_dict, project_id, student["student_id"])
            counter += 1
            print(f"added to {project_id} for a total of {counter}")
        if project_data_dict[project_id] is None:
            break
    return modified_project_data_dict, student_placements

def place_students_of_choice_balanced(
    project_data_dict: dict,
    placement_dict: dict,
    project_id: str,
    choice: list,
    num: int,
) -> Tuple[dict, dict]:
    """Will place the first `num` students of choice `choice` on a project, or will place until all students of `choice`
    have been placed. Chooses between valid students by picking those who appear the least frequently in other remaining
    votes."""

    counter = 0
    matching_student_frequency = sorted({student["student_id"]: count_student_votes(project_data_dict, student["student_id"])
                            for student in project_data_dict[project_id]["listStudentsSelected"]
                            if student["choice"] in choice}.items(), key=lambda x: x[1])
    print(matching_student_frequency)
    for i in range(num):
        if i >= len(matching_student_frequency):
            break
        project_data_dict, placement_dict = place_student(project_data_dict, placement_dict, project_id, matching_student_frequency[i][0])

    # for student in project_data_dict[project_id]["listStudentsSelected"]:
    #     if counter >= num:
    #         break
    #     if student["choice"] == choice:
    #         place_student(modified_project_data_dict, placement_dict, project_id, student["student_id"])
    #         counter += 1
    #         print(f"added to {project_id} for a total of {counter}")
    #     if project_data_dict[project_id] is None:
    #         break
    return project_data_dict, placement_dict


starting_students = []
for i in [[student["student_id"] for student in project["listStudentsSelected"]] for k, project in all_project_data.items()]:
    for j in i:
        starting_students.append(j)
num_starting_students = set(starting_students)


# Do step 1
for id, project in all_project_data.items():
    if project["proj_size_remaining"] == project["num_first_choice"]:
        all_project_data, student_placements = place_students_of_choice(all_project_data, student_placements, id, 1, project["proj_size_remaining"])

# Do step 2
for id, project in all_project_data.items():
    if project["proj_size_remaining"] >= project["num_first_choice"]:
        print("Project is smaller than first choice")
        all_project_data, student_placements = place_students_of_choice(all_project_data, student_placements, id, 1, project["proj_size_remaining"])

# Do step 3
_all_project_data = deepcopy(all_project_data)
for id, project in _all_project_data.items():
    if project["proj_size_remaining"] >= project["num_first_choice"]:
        all_project_data, student_placements = place_students_of_choice_balanced(all_project_data, student_placements, id, [2,15], project["proj_size_remaining"])


# Do step 4
_all_project_data = deepcopy(all_project_data)
for id, project in _all_project_data.items():
    all_project_data, student_placements = place_students_of_choice_balanced(all_project_data, student_placements, id, [1], project["proj_size_remaining"])


_all_project_data = deepcopy(all_project_data)
for id, project in _all_project_data.items():
    all_project_data, student_placements = place_students_of_choice_balanced(all_project_data, student_placements, id, [2,15], project["proj_size_remaining"])


# Check for issues
count = 0
for id, project in all_project_data.items():
     if project["proj_size_remaining"] >= len(project["listStudentsSelected"]):
         count += project["proj_size_remaining"] - len(project["listStudentsSelected"])
         print("Uh oh!" + str(count))

students = []
for i in [[student for student in project["students"]] for k, project in student_placements.items()]:
    for j in i:
        students.append(j)
num_students = set(students)
print(f"Number of Students assigned: {len(students)}/{len(num_students)}. Starting students: {len(num_starting_students)}")
print("lol")

added to recwHylW2G0horHUu for a total of 1
added to recwHylW2G0horHUu for a total of 2
added to recwHylW2G0horHUu for a total of 3
added to rec8VbKfxjLtIQnal for a total of 1
added to rec8VbKfxjLtIQnal for a total of 2
added to rec8VbKfxjLtIQnal for a total of 3
added to recj1nYmJ5Lfjoi1f for a total of 1
added to recj1nYmJ5Lfjoi1f for a total of 2
added to recj1nYmJ5Lfjoi1f for a total of 3
added to rec03wlXtIFNFy8lR for a total of 1
added to rec03wlXtIFNFy8lR for a total of 2
added to rec03wlXtIFNFy8lR for a total of 3
added to rec6nhvumluQP4zHF for a total of 1
added to rec6nhvumluQP4zHF for a total of 2
added to rec6nhvumluQP4zHF for a total of 3
added to recSdpegXLOpcbgOz for a total of 1
added to recSdpegXLOpcbgOz for a total of 2
added to recSdpegXLOpcbgOz for a total of 3
added to recyR7GZ1mFBKt7cd for a total of 1
added to recyR7GZ1mFBKt7cd for a total of 2
added to recyR7GZ1mFBKt7cd for a total of 3
added to recfRVcYvECgJ0BlY for a total of 1
added to recfRVcYvECgJ0BlY for a

In [1]:
%load_ext blackcellmagic
