## Imports ##

In [140]:
#imports
import math, random, copy, datetime

In [141]:
#Define Student structure
class Student:
    def __init__(self, tutorial_group, id, school, name, gender, cgpa) -> None:
        """To initialise a student object

        Args:
            tutorial_group (str): Student's class
            id (int): Student indentifier
            school (str): School that student belongs to
            name (str): Name of student
            gender (str): Either Male or Female
            cgpa (float): GPA of the studen
        """
        self.tutorial_group = tutorial_group
        self.id = id
        self.school = school
        self.name = name
        self.gender = gender
        self.cgpa = cgpa

In [142]:
#Class of functions to split students
class Split_Student:

    def __init__(self):
        pass


    #Functions to split students into different categories
    def split_students_by_school(self, students, **kwargs) -> dict:
        """Splits students by school

        Args:
            students (list): List of Student objects to be split

        Returns:
            dict: where key is the school, and the value is a list of students in the school
        """
        #Seperate students by school
        sorted_students = {}

        for student in students:
            if student.school not in sorted_students.keys(): #Current student's school is not recorded yet
                sorted_students[student.school] = [student]

            else: #student's school is already present
                sorted_students[student.school].append(student)

        return sorted_students


    def split_students_by_gender(self, students, **kwargs) -> dict:
        """Splits students by gender

        Args:
            students (list): List of Student objects to be split

        Returns:
            dict: where key is the gender, and the value is a list of students split by gender
        """
        #seperate into male & female
        sorted_students = {"Male":[], "Female":[]}

        for student in students:
            if student.gender == "Male": #if student is male
                sorted_students["Male"].append(student)

            else: #if student is female
                sorted_students["Female"].append(student)


        #If no male or female, remove it from dict
        if len(sorted_students["Male"]) == 0:
            del sorted_students["Male"]

        if len(sorted_students["Female"]) == 0:
            del sorted_students["Female"]

        return sorted_students


    def split_students_by_cgpa(self, students, **kwargs) -> dict:
        """Splits students by cgpa

        Args:
            students (list): List of Student objects to be split
            size (int): size of groups to sort into, expected to be input from kwargs

        Returns:
            dict: dict of where values are students split by cgpa and key their bands.
        """
        #Retrieve number_of_groups input
        size = kwargs["size"]

        #Find the splitting gpa, E.g. mean for 2 teams
        percentile = 100/size

        #Sort students based on cgpa first
        students.sort(key=lambda student: student.cgpa)

        #Split at the various percentiles
        sorted_students = {}
        band_number = 0
        cutoff = math.ceil((percentile/100)*len(students))

        while band_number < size:
            sorted_students[band_number] = students[:cutoff]

            #Remove assgined students from unassigned
            del students[:cutoff]

            #Check if any students left after the deletion, if no exit
            if len(students) == 0:
                break

            #increment band_number
            band_number += 1

        return sorted_students

In [143]:
#Grouping students into their respective teams 
def choose_students(students, schools, factor_order, number_of_groups, size, number_of_males, total_students) -> list:
    """Splits students into number of groups specified, with balanced teams based on school, gender and cgpa

    Args:
        students (list): list of students splited and sorted by factor order
        schools (list): list of schools that students are from
        factor_order (list): list of which the factors were considered, with first factor considered at index 0
        number_of_groups (int): number of groups to split into
        size (int): Size of groups to sort into
        number_of_males (int): Minimum number of males in each team
        total_students (int): Total number of students

    Returns:
        list: Contains lists, where each list is a group of students
    """
    #Initialise the final grouping list
    all_grouping = []

    #Possible profiles
    school_choices = schools #Schools
    gender_choices = ["Male"]*number_of_males + ["Female"]*(size-number_of_males)
    cgpa_choices = [band for band in range(size)] #GPA bands
    
    
    #Convert the choices in the same order, based on factor order
    choices = {"school": school_choices, "gender": gender_choices, "cgpa": cgpa_choices}

    first_choices = choices[factor_order[0]]
    second_choices = choices[factor_order[1]]
    third_choices = choices[factor_order[2]]


    #Value of Students already assigned
    students_assigned = 0
    
    for grouping in range(number_of_groups):
        #Duplicate the choices so it can be edited, using copy.deepcopy() so it does not affect original copy
        first_choices_not_chosen = copy.deepcopy(first_choices)
        second_choices_not_chosen = copy.deepcopy(second_choices)
        third_choices_not_chosen = copy.deepcopy(third_choices)

        #For each group iteration
        current_group = []
        students_selected = 0
        while students_selected < size and students_assigned < total_students: #Extra check to ensure that once all students are allocated it ends.
        
            #Refresh choices if exhausted
            if len(first_choices_not_chosen) == 0: 
                first_choices_not_chosen = copy.deepcopy(first_choices)
            
            if len(second_choices_not_chosen) == 0: 
                second_choices_not_chosen = copy.deepcopy(second_choices)
            
            if len(third_choices_not_chosen) == 0: 
                third_choices_not_chosen = copy.deepcopy(third_choices)
                
                
            #Select a school
            random.seed(datetime.datetime.now().timestamp()) #Changes seed every time a random choice is made, increasing randomness
            first_choice = random.choice(first_choices_not_chosen) #of those not chosen randomly choose 1


            #Select a Gender
            random.seed(datetime.datetime.now().timestamp()) #Changes seed every time a random choice is made, increasing randomness
            second_choice = random.choice(second_choices_not_chosen) #of those not chosen randomly choose 1


            #Select a cgpa band
            random.seed(datetime.datetime.now().timestamp()) #Changes seed every time a random choice is made, increasing randomness
            third_choice = random.choice(third_choices_not_chosen) #of those not chosen randomly choose 1
            


            

            try:
                #Extract that student
                chosen_one = students[first_choice][second_choice][third_choice][0] #Take the first one in the chosen profile

                #Remove the chosen student from future choices
                students[first_choice][second_choice][third_choice].remove(chosen_one)
                
                #Prevent repeat of same choice when there are other options
                first_choices_not_chosen.remove(first_choice) 
                second_choices_not_chosen.remove(second_choice) 
                third_choices_not_chosen.remove(third_choice)
                
                
                #Add the chosen student to the group
                current_group.append(chosen_one)

                #Increment the counter  
                students_selected += 1
                students_assigned += 1


            except (IndexError, KeyError) as e:
                #In the event of failure
                #Remove current one only from third layer if not empty
                if len(third_choices_not_chosen) > 1:
                    third_choices_not_chosen.remove(third_choice)
                
                else:
                    #If empty, refresh the list and attempt to remove current second choice
                    third_choices_not_chosen = copy.deepcopy(third_choices)
                    
                    #Attempt to remove second choice
                    if len(second_choices_not_chosen) > 1:
                        second_choices_not_chosen.remove(second_choice) 
                    
                    else:
                        #If empty, refresh the list and attempt to remove current first choice
                        second_choices_not_chosen = copy.deepcopy(second_choices)
                    
                        #Attempt to remove current first choice
                        if len(first_choices_not_chosen) > 1: 
                            first_choices_not_chosen.remove(first_choice) 
                        
                        else:
                            #If empty, start generation afresh                            
                            first_choices_not_chosen = copy.deepcopy(first_choices)
                

        #Add current group to all groups
        all_grouping.append(current_group)


    return all_grouping

In [144]:
#Parent function to execute subfunctions for organising students and grouping into teams
def sort_students(students, size) -> list:
    """Sort students based on School, Gender, CGPA in groups of given size

    Args:
        students (list): List of Student objects
        size (int): Size of groups to sort into

    Returns
        list: list of students sorted into groups of given size
    """
    #Decide order of factors based on unique values
    schools = len(set([student.school for student in students]))
    genders = len(set([student.gender for student in students]))
    cgpa_bands = size #we are splitting gpa bands by number of ppl in each group

    factors = {"school": schools, "gender": genders, "cgpa": cgpa_bands}

    #First factor to consider
    first_factor = min(factors, key=factors.get)
    del factors[first_factor]

    #Second factor to consider
    second_factor = min(factors, key=factors.get)
    del factors[second_factor]

    #Third factor to consider
    third_factor = min(factors, key=factors.get)
    del factors[third_factor]


    #Split and organise students based on order of factors
    spliting_functions = Split_Student()
    #First factor
    sorted_students = getattr(spliting_functions, "split_students_by_" + first_factor)(students, size=size)

    #Second factor
    for first_key, first_value in sorted_students.items(): #Access Second layer
        sorted_students[first_key] = getattr(spliting_functions, "split_students_by_" + second_factor)(first_value, size=size)

    #Third factor
    for first_key, first_value in sorted_students.items(): #Access Second layer
        for second_key,second_value in first_value.items(): #Access Third layer
            sorted_students[first_key][second_key] = getattr(spliting_functions, "split_students_by_" + third_factor)(second_value, size=size)


    #Number of groups
    no_of_groups = math.ceil(len(students)/size) #Rounded up

    #Store the order of factors
    factor_order = [first_factor, second_factor, third_factor]

    #Get unique schools of students
    schools = list(set([student.school for student in students]))
    
    #Get minimum number of males for each team to ensure gender balance
    number_of_males = len([student.gender for student in students if student.gender == "Male"])//no_of_groups

    #Group the students
    final_grouping = choose_students(sorted_students, schools, factor_order, no_of_groups, size, number_of_males, len(students))

    return final_grouping

In [145]:
def main():
    #Extract and split the data
    with open("records.csv", "r") as records:
        student_data = [record.strip().split(",") for record in records.readlines()[1:]] #Removing first row headers

    #Ask the group size from the user
    while True:
        try:
            size = int(input("How many students in each team: ").strip())
            break #Break only if input is converted into an integer

        except ValueError:
            pass

    #Initialize dictionary for all students
    all_tutorial_groups = {}

    #Apply the student class for easier access & move students into their tutorial groups
    for record in student_data:
        #Retrieve tutorial group
        tutorial_group = record[0]

        #Set up student object, attributes order: tutorial_group (str), id (int), school (str), name (str), gender (str), cgpa (float)
        student = Student(record[0],int(record[1]),record[2],record[3],record[4],float(record[5]))

        if tutorial_group in all_tutorial_groups.keys(): #If there was already a student from this group
            all_tutorial_groups[tutorial_group].append(student)

        else: #This tutorial group is not included yet
            all_tutorial_groups[tutorial_group] = [student] #Create new entry, with value being a list containing the student


    #Perform grouping for all students
    final_grouping = {}
    for tutorial_group, students in all_tutorial_groups.items():
        #For us to know progress
        print(f"Currently sorting tutorial group: {tutorial_group}")

        #Sort into their groups
        sorted_students = sort_students(students, size)

        #Store the sorted students
        final_grouping[tutorial_group] = sorted_students


    #Output the final grouping (Parse it back into the orginal csv format)
    group_number = 1
    with open("final_grouping.csv", "w+") as output:
        #Write headers
        output.write("Tutorial_Group,Student ID,School,Name,Gender,CGPA,Team Assigned\n")
        for tutorial_group, students in final_grouping.items():
            for group in students:
                for student in group:
                    output.write(f"{tutorial_group},{student.id},{student.school},{student.name},{student.gender},{student.cgpa},{group_number}\n")

                #Increment group number by 1 after current group is complete
                group_number += 1

In [146]:
if __name__ == "__main__":
    main()

Currently sorting tutorial group: G-1
Currently sorting tutorial group: G-10
Currently sorting tutorial group: G-100
Currently sorting tutorial group: G-101
Currently sorting tutorial group: G-102
Currently sorting tutorial group: G-103
Currently sorting tutorial group: G-104
Currently sorting tutorial group: G-105
Currently sorting tutorial group: G-106
Currently sorting tutorial group: G-107
Currently sorting tutorial group: G-108
Currently sorting tutorial group: G-109
Currently sorting tutorial group: G-11
Currently sorting tutorial group: G-110
Currently sorting tutorial group: G-111
Currently sorting tutorial group: G-112
Currently sorting tutorial group: G-113
Currently sorting tutorial group: G-114
Currently sorting tutorial group: G-115
Currently sorting tutorial group: G-116
Currently sorting tutorial group: G-117
Currently sorting tutorial group: G-118
Currently sorting tutorial group: G-119
Currently sorting tutorial group: G-12
Currently sorting tutorial group: G-120
Curre