Outstanding Questions:
1. Do we want the pools distinct when we assign, or lump everyone into a single pool for assignment? (If so, cannot treat each pool independently or someone could end up with the "extra" applications in both pools, therefore 2 more than someone else)
2. What features are helpful to have in the spreadsheet used during review? (Highlight entries? Each reviewer a different color?)

In [96]:
import pandas as pd
import numpy as np
import random
import xlsxwriter
import time

In [97]:
#Define reviewer groups
rev_groups = {'StudentReps':['Amin', 'Lambert'],
              'SysCog':['Nassar','Chirila','Truccolo','Sheinberg','Berson','Paradiso','Moore'],
              'CellMolec':['Jaworski','Fallon','Abdelfattah','Amin','Lambert','Aizenman','Mayoral']}
rev_groups['Reviewers'] = list(set([item for sublist in rev_groups.values() for item in sublist])) #Unique list of reviewers

In [98]:
def calc_reviewer_workload(num_applicants,rev_groups,n):
    # Calculate TOTAL workload for each reviewer across all applicants
    # Args:
    #    applicants: complete list of applicant names 
    #    rev_groups: dictionary categorizing reviewers into 'StudentReps', 'SysCog', 'CellMolec','Reviewers'
    #    n: single value, number of reviewers assigned to each application
    #
    # Returns:
    #    A dictionary containing TOTAL workload
    reviewers = rev_groups['Reviewers'] #Unique list of reviewers
    
    num_reviewers = len(reviewers)
    #num_applicants = len(applicants)
    num_reviews = num_applicants*n
    
    base_reviews = num_reviews // num_reviewers #// gives floor division, will always be integer rounded down
    remainder = num_reviews - (base_reviews*num_reviewers) # number of extra applications
  
    workload_counts = {rev: base_reviews for rev in reviewers}

    #Add remainder reviews to random subset of faculty reviewers (never student reviewers)
    fac_reviewers = list(set.difference(set(rev_groups['Reviewers']),set(rev_groups['StudentReps'])))
    random.shuffle(fac_reviewers)   
    if remainder != 0:
        for i, reviewer in enumerate(fac_reviewers):
            if i<remainder:
                workload_counts[reviewer] += 1 #for remainder faculty, their workload is 1 extra

    return workload_counts

In [136]:
#VERSION WITH SEPARATE APPLICANT POOLS
def assign_reviewers_multipool(applicant_pools,rev_groups,n):
    # Args:
    #    applicant_pools: dictionary containing list of applicant names in each pool key
    #    rev_groups: dictionary categorizing reviewers into 'StudentReps', 'SysCog', 'CellMolec','Reviewers'
    #    n: single value, number of reviewers assigned to each application
    #
    # Returns:
    #    assignment_list_df: dataframe of each applicant and list of their reviewers
    #    reviewer_workload_df: dataframe of each reviewer and list of their assigned applicants

    reviewers = rev_groups['Reviewers'] #Unique list of reviewers
    pools = list(applicant_pools.keys())

    #Calculate workload, keeping total evenly distributed and with pool evenly distributed (ish)
    num_applicants = sum(len(value) for value in applicant_pools.values()) #Calculate total number of applicants
    workload_counts_total = calc_reviewer_workload(num_applicants,rev_groups,n) #Total reviews per reviewer   
    
    workload = {}
    for pool in pools[:-1]: #Calculate workloads independently for each pool except final pool
        workload[pool] = calc_reviewer_workload(len(applicant_pools[pool]),rev_groups,n_rev_per_app)
            
    workload_pool_total = {}
    for inner_dict in workload.values(): #sum of workloads across pools, exculding final pool
        for key, value in inner_dict.items():
            workload_pool_total[key] = workload_pool_total.get(key,0) + value
       
    workload[pools[-1]] = {key: workload_counts_total[key] - workload_pool_total[key] for key in workload_counts_total}

#TO-DO: Put everything in loop separate and then combine across pools for final output

    assignment_list = {}

    for pool in applicant_pools:
        workload_counts = workload[pool]
        applicants = applicant_pools[pool]
       
        #Create assignment list
        random.shuffle(applicants)
        assignment_list[pool] = {applicant:[] for applicant in applicants}  
        for applicant in applicants:
            #Pull list of possible reviewers based on if their workload is already full or not
            available_reviewers = [candidates for candidates, count in workload_counts.items() if count > 0]
            if len(available_reviewers) < n:
                raise ValueError(
                    f"Cannot find {n} unique reviewers for '{applicant}'. The remaining pool "
                    f"of reviewers with available slots ({len(available_reviewers)}) is too small."
                )
            #Check if possible to select subset from available reviewers that will not break rules, else raise ValueError
            #From available reviewers, choose n at random    
            if sorted(available_reviewers) == sorted(rev_groups['StudentReps']):
                raise ValueError('Break Rule 1: Student only reviewers')
            if not any(item in available_reviewers for item in rev_groups['SysCog']) and any(item in available_reviewers for item in rev_groups['CellMolec']):
                raise ValueError('Break Rule 2: Distribution of fields')
            #Draw random set of reviewers from those with workload available
            chosen_reviewers = random.sample(available_reviewers, n)          
            
            #Check reviewers chosen follows rule, or else pick another sample
            start_time = time.time()
            time_limit = 1
            while sorted(chosen_reviewers) == sorted(rev_groups['StudentReps']) or not (any(item in chosen_reviewers for item in rev_groups['SysCog']) and any(item in chosen_reviewers for item in rev_groups['CellMolec'])):
                chosen_reviewers = random.sample(available_reviewers, n)    
    
                #Sometimes code gets hung. Introduce timeout to restart.
                current_time = time.time()
                if current_time - start_time >= time_limit:
                    print("Assign Reviewers Timeout")
                    break
    
                
            assignment_list[pool][applicant] = chosen_reviewers
            for reviewer in chosen_reviewers:
                workload_counts[reviewer] -= 1

    merged_assignment_list = {key:value for sub_dict in assignment_list.values() for key,value in sub_dict.items()}

    #Reverse map assignments
    reviewer_workload = {reviewer:[] for reviewer in reviewers}   
    for app, revs in merged_assignment_list.items():
        for rev in revs:
            reviewer_workload[rev].append(app)

    total = 0
    for key,value in reviewer_workload.items():
        total = total + len(value)
        print(f"Number of apps for {key} is {len(value)}")
    print(f"Total reviews is {total}")
        
    reviewer_workload_df = pd.DataFrame(reviewer_workload.items(),columns = ['Reviewer','Applicants'])
    assignment_list_df = pd.DataFrame(merged_assignment_list.items(),columns = ['Applicant','Reviewers'])
            
    return assignment_list_df, reviewer_workload_df

In [None]:
#VERSION WITH ALL APPLICANTS GROUPED TOGETHER
def assign_reviewers(applicants,rev_groups,n):
    # Args:
    #    applicants: list of applicant names
    #    rev_groups: dictionary categorizing reviewers into 'StudentReps', 'SysCog', 'CellMolec','Reviewers'
    #    n: single value, number of reviewers assigned to each application
    #
    # Returns:
    #    assignment_list_df: dataframe of each applicant and list of their reviewers
    #    reviewer_workload_df: dataframe of each reviewer and list of their assigned applicants

    reviewers = rev_groups['Reviewers'] #Unique list of reviewers
    num_applicants = len(applicants)
    workload_counts = calc_reviewer_workload(num_applicants,rev_groups,n) 

    
        
    #Create assignment list
    random.shuffle(applicants)
    assignment_list = {applicant:[] for applicant in applicants}  
    for applicant in applicants:
        #Pull list of possible reviewers based on if their workload is already full or not
        available_reviewers = [candidates for candidates, count in workload_counts.items() if count > 0]
        if len(available_reviewers) < n:
            raise ValueError(
                f"Cannot find {n} unique reviewers for '{applicant}'. The remaining pool "
                f"of reviewers with available slots ({len(available_reviewers)}) is too small."
            )
        #Check if possible to select subset from available reviewers that will not break rules, else raise ValueError
        #From available reviewers, choose n at random    
        if sorted(available_reviewers) == sorted(rev_groups['StudentReps']):
            raise ValueError('Break Rule 1: Student only reviewers')
        if not any(item in available_reviewers for item in rev_groups['SysCog']) and any(item in available_reviewers for item in rev_groups['CellMolec']):
            raise ValueError('Break Rule 2: Distribution of fields')
        #Draw random set of reviewers from those with workload available
        chosen_reviewers = random.sample(available_reviewers, n)          
        
        #Check reviewers chosen follows rule, or else pick another sample
        start_time = time.time()
        time_limit = 1
        while sorted(chosen_reviewers) == sorted(rev_groups['StudentReps']) or not (any(item in chosen_reviewers for item in rev_groups['SysCog']) and any(item in chosen_reviewers for item in rev_groups['CellMolec'])):
            chosen_reviewers = random.sample(available_reviewers, n)    

            #Sometimes code gets hung. Introduce timeout to restart.
            current_time = time.time()
            if current_time - start_time >= time_limit:
                print("Assign Reviewers Timeout")
                break

            
        assignment_list[applicant] = chosen_reviewers
        for reviewer in chosen_reviewers:
            workload_counts[reviewer] -= 1
    
    #Reverse map assignments
    reviewer_workload = {reviewer:[] for reviewer in reviewers}   
    for app, revs in assignment_list.items():
        for rev in revs:
            reviewer_workload[rev].append(app)

    total = 0
    for key,value in reviewer_workload.items():
        total = total + len(value)
        print(f"Number of apps for {key} is {len(value)}")
    print(f"Total reviews is {total}")
        
    reviewer_workload_df = pd.DataFrame(reviewer_workload.items(),columns = ['Reviewer','Applicants'])
    assignment_list_df = pd.DataFrame(assignment_list.items(),columns = ['Applicant','Reviewers'])
            
    return assignment_list_df, reviewer_workload_df

In [5]:
#VERSION WITH ALL APPLICANTS GROUPED TOGETHER
def assign_reviewers(applicants,rev_groups,n):
    
    # Args:
    #    applicants: list of applicant names
    #    rev_groups: dictionary categorizing reviewers into 'StudentReps', 'SysCog', 'CellMolec','Reviewers'
    #    n: single value, number of reviewers assigned to each application
    #
    # Returns:
    #    assignment_list_df: dataframe of each applicant and list of their reviewers
    #    reviewer_workload_df: dataframe of each reviewer and list of their assigned applicants


    reviewers = rev_groups['Reviewers'] #Unique list of reviewers
    
    num_reviewers = len(reviewers)
    num_applicants = len(applicants)
    num_reviews = num_applicants*n
    
    base_reviews = num_reviews // num_reviewers #// gives floor division, will always be integer rounded down
    remainder = num_reviews - (base_reviews*num_reviewers) # number of extra applications
    
    #make a list of reviewers with base_reviews repeats of each name, + 1 for remainder number of reviews, then shuffle
    random.shuffle(reviewers)
    
    #Create a random list of faculty with repeats the number of applications each needs to read
    workload_counts = {rev: base_reviews for rev in reviewers}
    if remainder != 0:
        for i, reviewer in enumerate(reviewers):
            if i<remainder:
                workload_counts[reviewer] += 1 #for remainder faculty, their workload is 1 extra


    #Create assignment list
    random.shuffle(applicants)
    assignment_list = {applicant:[] for applicant in applicants}  
    for applicant in applicants:
        #Pull list of possible reviewers based on if their workload is already full or not
        available_reviewers = [candidates for candidates, count in workload_counts.items() if count > 0]
        if len(available_reviewers) < n:
            raise ValueError(
                f"Cannot find {n} unique reviewers for '{applicant}'. The remaining pool "
                f"of reviewers with available slots ({len(available_reviewers)}) is too small."
            )
        #Check if possible to select subset from available reviewers that will not break rules, else raise ValueError
        #From available reviewers, choose n at random    
        if sorted(available_reviewers) == sorted(rev_groups['StudentReps']):
            raise ValueError('Break Rule 1: Student only reviewers')
        if not any(item in available_reviewers for item in rev_groups['SysCog']) and any(item in available_reviewers for item in rev_groups['CellMolec']):
            raise ValueError('Break Rule 2: Distribution of fields')
        #Draw random set of reviewers from those with workload available
        chosen_reviewers = random.sample(available_reviewers, n)          
        
        #Check reviewers chosen follows rule, or else pick another sample
        while sorted(chosen_reviewers) == sorted(rev_groups['StudentReps']) or not (any(item in chosen_reviewers for item in rev_groups['SysCog']) and any(item in chosen_reviewers for item in rev_groups['CellMolec'])):
            chosen_reviewers = random.sample(available_reviewers, n)    

            
        assignment_list[applicant] = chosen_reviewers
        for reviewer in chosen_reviewers:
            workload_counts[reviewer] -= 1
    
    #Reverse map assignments
    reviewer_workload = {reviewer:[] for reviewer in reviewers}   
    for app, revs in assignment_list.items():
        for rev in revs:
            reviewer_workload[rev].append(app)

    total = 0
    for key,value in reviewer_workload.items():
        total = total + len(value)
        print(f"Number of apps for {key} is {len(value)}")
    print(f"Total reviews is {total}, should be {num_reviews}")
        
    reviewer_workload_df = pd.DataFrame(reviewer_workload.items(),columns = ['Reviewer','Applicants'])
    assignment_list_df = pd.DataFrame(assignment_list.items(),columns = ['Applicant','Reviewers'])
            
    return assignment_list_df, reviewer_workload_df

# ROUND 1

## Section 1A: Import Data (Round 1)

In [101]:
review_round = 1

applicant_info = pd.read_excel('AdmissionsTest.xlsx') #Read Data Spreadsheet exported from Slate
n_rev_per_app = 2 #number of reviewers assigned to each application

# All applicants in single pool
applicants = applicant_info['Person Name'].values

# Separate TG-Eligible and International applicant pools
applicant_pools = {'International':applicant_info.loc[applicant_info['Citizenship Status']=='Foreign Citizen','Person Name'].values,
                  'TG-Eligible':applicant_info.loc[applicant_info['Citizenship Status']!='Foreign Citizen','Person Name'].values}
#applicants_int = applicant_info.loc[applicant_info['Citizenship Status']=='Foreign Citizen','Person Name'].values
#applicant_tg = applicant_info.loc[applicant_info['Citizenship Status']!='Foreign Citizen','Person Name'].values
print(len(applicant_info))

print(f"--Imported Data for Round {review_round}, {len(applicant_info)} applications to review--")


376
--Imported Data for Round 1, 376 applications to review--


## Section 1B: Assign Reviewers

Randomly assign reviewers to applications based on the following rules:
1. Workload should be distributed evenly
2. Application assignments should be as random as possible (two reviewers should not have identical pools of applications)
3. An individual application should never be reviewed by ONLY the student reps
4. Application should be reviewed by a sys/cog/comp and cell/molec person


In [138]:

#Run until it finds proper assignment with no errors. For some reason sometimes will get hung and need to interrupt Kernel and try again.
start_time = time.time()
timeout_seconds = 3

while True:
    current_time = time.time()
    if current_time - start_time >= timeout_seconds:
        print("Timeout reached, try again")
        break
    try:
        assignment_list, reviewer_workload = assign_reviewers_multipool(applicant_pools,rev_groups,n_rev_per_app)
        #assignment_list, reviewer_workload = assign_reviewers(applicants,rev_groups,n_rev_per_app)
        #print(workload_counts)
        break
    except ValueError as e:
        print(f"Error: {e}")

print(f"--Assigned Reviewers for Round {review_round}--")
        


Number of apps for Abdelfattah is 54
Number of apps for Truccolo is 53
Number of apps for Mayoral is 54
Number of apps for Amin is 53
Number of apps for Fallon is 54
Number of apps for Berson is 54
Number of apps for Lambert is 53
Number of apps for Aizenman is 54
Number of apps for Chirila is 53
Number of apps for Moore is 54
Number of apps for Paradiso is 54
Number of apps for Sheinberg is 54
Number of apps for Jaworski is 54
Number of apps for Nassar is 54
Total reviews is 752
--Assigned Reviewers for Round 1--


In [54]:
#Analyze distribution of fields for each application (not required, just to do a manual verification while debugging)
assignment_list.head()
for applicant in assignment_list['Applicant'].values:
    chosen_reviewers = assignment_list.loc[assignment_list['Applicant']==applicant,'Reviewers'].values[0]
    if any(item in chosen_reviewers for item in rev_groups['SysCog']) and any(item in chosen_reviewers for item in rev_groups['CellMolec']):
        assignment_list.loc[assignment_list['Applicant']==applicant,'Check'] = True

test = assignment_list['Check'].value_counts()
print(test)
assignment_list.to_excel('Assignments for review.xlsx')

Check
True    376
Name: count, dtype: int64


## Section 1C: Generate score cards

In [140]:
print(f"--Generating Scorecards for Round {review_round}--")

reviewers = rev_groups['Reviewers']
for reviewer in reviewers:
    reviewer_df = applicant_info[applicant_info['Person Name'].isin(reviewer_workload['Applicants'].loc[reviewer_workload['Reviewer']==reviewer].values[0])]
    reviewer_df = reviewer_df.sort_values(by='Person Name')
    reviewer_df.insert(0, 'Score', '') #Add blank column for Score (position, name, value)
    reviewer_df.insert(1, 'Notes', '') #Add blank column for Score (position, name, value)
    #reviewer_df = scorecard_base_df[scorecard_base_df['Reviewers'].isin([reviewer])].copy()
    #reviewer_df = reviewer_df.drop(columns = 'Reviewers')

    filename = f"{reviewer}_Scorecard - Round {review_round}.xlsx"
    writer = pd.ExcelWriter(filename,engine = 'xlsxwriter')
    reviewer_df.to_excel(writer,index = False,header=False) #header = false, Insert header below with xlsxwriter in order to format
    
    #Format score card
    workbook = writer.book
    worksheet = writer.sheets['Sheet1']
    header_format = workbook.add_format({
        'bold': True,
        'font_size': 13,
        'align':'center',
        'font_color': '#ffffff',
        'fg_color':'#000000'       
        
    })
    for col_num, value in enumerate(reviewer_df.columns.values):
        worksheet.write(0, col_num, value, header_format) #(row,column,value,format)
    worksheet.set_column(0,25,15) #Set minimum column width
    worksheet.autofit() #Autofit (finicky, which is why I do minimum width first)

    writer.close() #Without this line the file will be corrupt

    num_int = len(reviewer_df.loc[reviewer_df['Citizenship Status']=='Foreign Citizen'])
    num_tg = len(reviewer_df.loc[reviewer_df['Citizenship Status']!='Foreign Citizen'])
    print(f"{reviewer} has {num_int} International and {num_tg} TG-Eligible, {num_int + num_tg} total applications")    
    
    

--Generating Scorecards for Round 1--
Abdelfattah has 26 International and 28 TG-Eligible, 54 total applications
Truccolo has 25 International and 28 TG-Eligible, 53 total applications
Mayoral has 26 International and 28 TG-Eligible, 54 total applications
Amin has 25 International and 28 TG-Eligible, 53 total applications
Fallon has 26 International and 28 TG-Eligible, 54 total applications
Berson has 26 International and 28 TG-Eligible, 54 total applications
Lambert has 25 International and 28 TG-Eligible, 53 total applications
Aizenman has 26 International and 28 TG-Eligible, 54 total applications
Chirila has 26 International and 27 TG-Eligible, 53 total applications
Moore has 25 International and 29 TG-Eligible, 54 total applications
Paradiso has 26 International and 28 TG-Eligible, 54 total applications
Sheinberg has 26 International and 28 TG-Eligible, 54 total applications
Jaworski has 26 International and 28 TG-Eligible, 54 total applications
Nassar has 26 International and 28 T

In [None]:
# To play with formatting

## Section 1D: Merge completed score cards (Round 1)

In [144]:
#Concatenate all reviewer score cards 

print(f"--Merging Scorecards for Round {review_round}--")
reviewers = rev_groups['Reviewers']
#reviewers = ['Amin','Jaworski'] #IF DGS SUBMITS A SCORESHEET NEED TO ADD NAME TO REVIEWERS FOR MERGE

score_df = applicant_info.copy()

for reviewer in reviewers:
    #First review all Person Names and make sure they are in applicants
    df = pd.read_excel(f"{reviewer}_Scorecard - Round {review_round}.xlsx")
    df.rename(columns={'Score': f"{reviewer} Score"}, inplace=True) #rename Score column to include reviewer name
    score_df = pd.merge(score_df,df[[f"{reviewer} Score",'Person Name']], how = 'left',on = 'Person Name')
    #If any errors in merge (like wrong name) should flag for me to correct
score_df.head()

#Reorder columns so scores are first
num_reviewers = len(reviewers)
cols = score_df.columns.tolist()
cols = cols[-num_reviewers:]+cols[:-num_reviewers]
score_df = score_df[cols]
score_df.head()

#Add mean score (should this be a formula instead in case things change??? maybe)
#mean_score = score_df.iloc[:,:num_reviewers].mean(axis = 1)
#sd_score = score_df.iloc[:,:num_reviewers].std(axis = 1)
score_df.insert(num_reviewers,'Mean','')
score_df.insert(num_reviewers+1,'SD','')
score_df.insert(num_reviewers+2,'Next Round? (y/n)','')
score_df.head()

#Separate tabs for International and TG-Eligible

score_int = score_df.loc[score_df['Citizenship Status'] == 'Foreign Citizen']
#score_int = score_int.sort_values(by='Mean')

score_tg = score_df.loc[score_df['Citizenship Status'] != 'Foreign Citizen']
#score_tg = score_tg.sort_values(by='Mean')


--Merging Scorecards for Round 1--


In [145]:
print(f"--Formatting Merged Score Spreadsheet for Round {review_round}--")

#Create file, separate tabs for International and TG-Eligible pools
filename = f"Round {review_round} Scores.xlsx"
writer = pd.ExcelWriter(filename,engine = 'xlsxwriter')
#score_df.to_excel(writer,sheet_name = index = False,header=False) #header = false, Insert header below with xlsxwriter in order to format
score_int.to_excel(writer,sheet_name = 'International', index = False,header=False) #header = false, Insert header below with xlsxwriter in order to format
score_tg.to_excel(writer,sheet_name = 'TG-Eligible', index = False,header=False) #header = false, Insert header below with xlsxwriter in order to format

# --Format score card headers--
workbook = writer.book
worksheet1 = writer.sheets['International']
worksheet2 = writer.sheets['TG-Eligible']

header_format = workbook.add_format({
    'bold': True,
    'font_size': 13,
    'align':'center',
    'font_color': '#ffffff',
    'fg_color':'#000000',
    'text_wrap':True
    
})
for col_num, value in enumerate(score_int.columns.values):
    worksheet1.write(0, col_num, value, header_format) #(row,column,value,format)


for col_num, value in enumerate(score_tg.columns.values):
    worksheet2.write(0, col_num, value, header_format) #(row,column,value,format)


# --Additional formatting--
first_row = 1
last_row = 100 #number of applicants in pool
first_col = score_int.columns.get_loc('Next Round? (y/n)') #column ID of Next Round?
last_col = score_int.columns.get_loc('Next Round? (y/n)') #column ID of Next Round?

#center align all of the scores
format0 = workbook.add_format({'align':'center'})
worksheet1.set_column(0,last_col,10,format0)
#worksheet1.set_column(last_col+1,25,25,10)
worksheet1.autofit() #Autofit (finicky, which is why I do minimum width first)


worksheet2.set_column(0,last_col,10,format0)
#worksheet2.set_column(last_col+1,25,25,10)
worksheet2.autofit() #Autofit (finicky, which is why I do minimum width first)
                      

# --Conditional formatting--

# Format Next 'Round? (y/n)'' column, green for y, red for n, yellow for m

nr_format1 = workbook.add_format({
    'bg_color':'#9BDE87'
})
nr_format2 = workbook.add_format({
    'bg_color':'#DE8E8E'
})
nr_format3 = workbook.add_format({
    'bg_color':'#EBE3AO'
})


worksheet1.conditional_format(first_row, first_col, last_row, last_col,
                             {'type':'text',
                             'criteria':'containing',
                              'value':'m',
                             'format':nr_format3})
worksheet1.conditional_format(first_row, first_col, last_row, last_col,
                             {'type':'text',
                             'criteria':'containing',
                              'value':'y',
                             'format':nr_format1})
worksheet1.conditional_format(first_row, first_col, last_row, last_col,
                             {'type':'text',
                             'criteria':'containing',
                              'value':'n',
                             'format':nr_format2})

worksheet2.conditional_format(first_row, first_col, last_row, last_col,
                             {'type':'text',
                             'criteria':'containing',
                              'value':'m',
                             'format':nr_format3})
worksheet2.conditional_format(first_row, first_col, last_row, last_col,
                             {'type':'text',
                             'criteria':'containing',
                              'value':'y',
                             'format':nr_format1})
worksheet2.conditional_format(first_row, first_col, last_row, last_col,
                             {'type':'text',
                             'criteria':'containing',
                              'value':'n',
                             'format':nr_format2})

# Color faculty scores (for prettiness! Definitely not necessary)
reviewer_colorspace = ['#F28FD4','#ffa3a2','#fcf6e7','#ff8e58','#da6175','#B05199','#446AB2',
                       '#3296A6','#24b8a0','#8ee6a1','#edd54f','#6bae36','#7da497','#c8cdbb']
for idx, value in enumerate(reviewers):
    rev_format = workbook.add_format({'bg_color':reviewer_colorspace[idx],
                                     'bold':True,
                                     'align':'center'})
    worksheet1.conditional_format(first_row,idx,last_row,idx,
                                 {'type':'no_blanks',
                                 'format':rev_format})
    worksheet2.conditional_format(first_row,idx,last_row,idx,
                                 {'type':'no_blanks',
                                 'format':rev_format})
    


writer.close() #Without this line the file will be corrupt



--Formatting Merged Score Spreadsheet for Round 1--


### Manual edits to spreadsheet
Open the spreadsheet and add the following by hand:
1. Count number of interviews (=COUNTIF(range,"=y") to bottom of 'Next Round (y/n)' column) for both sheets (probably a way to add this, but would likely not be any more efficient because need to hard code cell location)
2. Mean score for each reviewer (likely a way to add this in the code, but right now I'm lazy!)
3. Mean and SD score for each applicant (better to make it a formula in case corrections are made during the meeting?)

# ROUND 2

## Section 2A: Import Data

In [132]:
#Import completed spreadsheet from above DOES IT COLLAPSE ACROSS TABS??? CHECK! (likely separate tabs for different pools)
review_round = 2
applicant_info_r2 = pd.read_excel(f"Round {review_round-1} Scores.xlsx") #Load merged score spreadsheet from previous round

#Edit to get similar dataframe as for round 1
applicant_info_r2.drop(applicant_info_r2.iloc[:,0:len(reviewers)+2],axis = 1, inplace = True) #Drop reviewer scores, mean, and sd
applicant_info_r2 = applicant_info_r2.loc[applicant_info_r2['Next Round? (y/n)']=='y'] #Filter out those not going to next round
applicant_info_r2.drop(columns = 'Next Round? (y/n)',axis = 1, inplace = True) 
applicant_info_r2.head() #First column should be Person Name

n_rev_per_app2 = 3 #number of reviewers assigned to each application

# All applicants in single pool
applicants_r2 = applicant_info_r2['Person Name'].values

# Separate TG-Eligible and International applicant pools
applicant_pools_r2 = {'International':applicant_info_r2.loc[applicant_info_r2['Citizenship Status']=='Foreign Citizen','Person Name'].values,
                  'TG-Eligible':applicant_info_r2.loc[applicant_info_r2['Citizenship Status']!='Foreign Citizen','Person Name'].values}


print(f"--Imported Data for Round {review_round}, {len(applicant_info_r2)} applications to review--")


--Imported Data for Round 2, 2 applications to review--


In [120]:
n_rev_per_app2 = 2 #number of reviewers assigned to each application

# All applicants in single pool
applicants_r2 = applicant_info_r2['Person Name'].values

# Separate TG-Eligible and International applicant pools
applicant_pools_r2 = {'International':applicant_info_r2.loc[applicant_info_r2['Citizenship Status']=='Foreign Citizen','Person Name'].values,
                  'TG-Eligible':applicant_info_r2.loc[applicant_info_r2['Citizenship Status']!='Foreign Citizen','Person Name'].values}

print(len(applicant_info_r2))



2


## Section 2B: Assign Reviewers

In [None]:
#Run until it finds proper assignment with no errors. For some reason sometimes will get hung and need to interrupt Kernel and try again.
start_time = time.time()
timeout_seconds = 3
print(f"--Assigning Reviewers for Round {review_round}--")
while True:
    current_time = time.time()
    if current_time - start_time >= timeout_seconds:
        print("Timeout reached, try again")
        break
    try:
        assignment_list, reviewer_workload = assign_reviewers_multipool(applicants_r2,rev_groups,n_rev_per_app2) #If multiple applicant pools
        #assignment_list, reviewer_workload = assign_reviewers(applicants,rev_groups,n_rev_per_app) #If single pool
        break
    except ValueError as e:
        print(f"Error: {e}")

## Section 2C: Generate Scorecards

In [None]:
#Generate reviewer score cards WITH formatting
print(f"--Generating Scorecards for Round {review_round}--")
reviewers = rev_groups['Reviewers']
for reviewer in reviewers:
    reviewer_df = applicant_info_r2[applicant_info_r2['Person Name'].isin(reviewer_workload['Applicants'].loc[reviewer_workload['Reviewer']==reviewer].values[0])]
    reviewer_df = reviewer_df.sort_values(by='Person Name')
    reviewer_df.insert(0, 'Score', '') #Add blank column for Score (position, name, value)
    reviewer_df.insert(1, 'Notes', '') #Add blank column for Score (position, name, value)
    #reviewer_df = scorecard_base_df[scorecard_base_df['Reviewers'].isin([reviewer])].copy()
    #reviewer_df = reviewer_df.drop(columns = 'Reviewers')

    filename = f"{reviewer}_Scorecard - Round {review_round}.xlsx"
    writer = pd.ExcelWriter(filename,engine = 'xlsxwriter')
    reviewer_df.to_excel(writer,index = False,header=False) #header = false, Insert header below with xlsxwriter in order to format
    
    #Format score card
    workbook = writer.book
    worksheet = writer.sheets['Sheet1']
    header_format = workbook.add_format({
        'bold': True,
        'font_size': 13,
        'align':'center',
        'font_color': '#ffffff',
        'fg_color':'#000000'       
        
    })
    for col_num, value in enumerate(reviewer_df.columns.values):
        worksheet.write(0, col_num, value, header_format) #(row,column,value,format)
    worksheet.set_column(0,25,15) #Set minimum column width
    worksheet.autofit() #Autofit (finicky, which is why I do minimum width first)

    writer.close() #Without this line the file will be corrupt