In [1]:
import pandas as pd
import numpy as np
import random
import xlsxwriter

## Section 1: Assign reviewers

In [68]:
# import applicant info spreadsheet

applicant_info = pd.read_excel('AdmissionsTest.xlsx')
n = 2 #number of reviewers assigned to each application
review_round = 1

reviewers = ['Nassar','Chirila','Truccolo','Sheinberg','Berson','Paradiso','Moore','Jaworski','Fallon','Abdelfattah','Amin','Lambert','Aizenman','Mayoral']
rev_groups = {'StudentReps':['Amin', 'Lambert'],
              'SysCog':['Nassar','Chirila','Truccolo','Sheinberg','Berson','Paradiso','Moore'],
              'CellMolec':['Jaworski','Fallon','Abdelfattah','Amin','Lambert','Aizenman','Mayoral']}
applicants = applicant_info['Person Name'].values
print(rev_groups)
test = ''
print(rev_groups['StudentReps'])
#chosen_reviewers = ['Amin','Truccolo']
#if sorted(chosen_reviewers) == sorted(rev_groups['StudentReps']) or not (any(item in chosen_reviewers for item in rev_groups['SysCog']) and any(item in chosen_reviewers for item in rev_groups['CellMolec'])):
#    print('bad list')

{'StudentReps': ['Amin', 'Lambert'], 'SysCog': ['Nassar', 'Chirila', 'Truccolo', 'Sheinberg', 'Berson', 'Paradiso', 'Moore'], 'CellMolec': ['Jaworski', 'Fallon', 'Abdelfattah', 'Amin', 'Lambert', 'Aizenman', 'Mayoral']}
['Amin', 'Lambert']


In [69]:
#ADDITIONAL RULES: 
#1. Application should never be reviewed by ONLY the student reps
#2. Application should be reviewed by a sys/cog and cel/molec person

#VERSION WITH ALL APPLICANTS GROUPED TOGETHER
def assign_reviewers(reviewers,applicants,rev_groups,n):
    # Args:
    #    reviewers: list of reviewer names
    #    applicants: list of applicant names
    #    rev_groups: dictionary categorizing reviewers into 'StudentReps', 'SysCog', 'CellMolec'
    #    n: single value, number of reviewers assigned to each application
    #
    # Returns:
    #    A dataframe of application assignments
    
    num_reviewers = len(reviewers)
    num_applicants = len(applicants)
    num_reviews = num_applicants*n
    
    base_reviews = num_reviews // num_reviewers #// gives floor division, will always be integer rounded down
    remainder = num_reviews - (base_reviews*num_reviewers) # number of extra applications
    
    #make a list of reviewers with base_reviews repeats of each name, + 1 for remainder number of reviews, then shuffle
    random.shuffle(reviewers)
    
    #Create a random list of faculty with repeats the number of applications each needs to read
    reviewer_pool = []
    workload_counts = {rev: base_reviews for rev in reviewers}
    if remainder != 0:
        for i, reviewer in enumerate(reviewers):
            if i<remainder:
                workload_counts[reviewer] += 1 #for remainder faculty, their workload is 1 extra


    #Create assignment list
    random.shuffle(applicants)
    assignment_list = {applicant:[] for applicant in applicants}  
    for applicant in applicants:
        #Pull list of possible reviewers based on if their workload is already full or not
        available_reviewers = [candidates for candidates, count in workload_counts.items() if count > 0]
        if len(available_reviewers) < n:
            raise ValueError(
                f"Cannot find {n} unique reviewers for '{applicant}'. The remaining pool "
                f"of reviewers with available slots ({len(available_reviewers)}) is too small."
            )
        #Check if possible to select subset from available reviewers that will not break rules, else raise ValueError
        #From available reviewers, choose n at random    
        if sorted(available_reviewers) == sorted(rev_groups['StudentReps']):
            raise ValueError('Break Rule 1: Student only reviewers')
        if not any(item in available_reviewers for item in rev_groups['SysCog']) and any(item in available_reviewers for item in rev_groups['CellMolec']):
            raise ValueError('Break Rule 2: Distribution of fields')
        #Draw random set of reviewers from those with workload available
        chosen_reviewers = random.sample(available_reviewers, n)          
        
        #Check reviewers chosen follows rule, or else pick another sample
        while sorted(chosen_reviewers) == sorted(rev_groups['StudentReps']) or not (any(item in chosen_reviewers for item in rev_groups['SysCog']) and any(item in chosen_reviewers for item in rev_groups['CellMolec'])):
            chosen_reviewers = random.sample(available_reviewers, n)    

            
        assignment_list[applicant] = chosen_reviewers
        for reviewer in chosen_reviewers:
            workload_counts[reviewer] -= 1
    
    #Reverse map assignments
    reviewer_workload = {reviewer:[] for reviewer in reviewers}   
    for app, revs in assignment_list.items():
        for rev in revs:
            reviewer_workload[rev].append(app)

    total = 0
    for key,value in reviewer_workload.items():
        total = total + len(value)
        print(f"Number of apps for {key} is {len(value)}")
    print(f"Total reviews is {total}, should be {num_reviews}")
        
    reviewer_workload_df = pd.DataFrame(reviewer_workload.items(),columns = ['Reviewer','Applicants'])
    assignment_list_df = pd.DataFrame(assignment_list.items(),columns = ['Applicant','Reviewers'])
            
    return assignment_list_df, reviewer_workload_df

In [70]:
while True:
    try:
        assignment_list, reviewer_workload = assign_reviewers(reviewers,applicants,rev_groups,n)
        #print(workload_counts)
        break
    except ValueError as e:
        print(f"Error: {e}")

#assignment_list.to_excel('test.xlsx')

Number of apps for Paradiso is 53
Number of apps for Moore is 53
Number of apps for Amin is 53
Number of apps for Sheinberg is 53
Number of apps for Truccolo is 53
Number of apps for Berson is 53
Number of apps for Aizenman is 53
Number of apps for Chirila is 53
Number of apps for Abdelfattah is 53
Number of apps for Nassar is 53
Number of apps for Fallon is 53
Number of apps for Jaworski is 53
Number of apps for Lambert is 53
Number of apps for Mayoral is 53
Total reviews is 742, should be 742


In [67]:
#Analyze distribution of fields for each application
assignment_list.head()
for applicant in assignment_list['Applicant'].values:
    chosen_reviewers = assignment_list.loc[assignment_list['Applicant']==applicant,'Reviewers'].values[0]
    if any(item in chosen_reviewers for item in rev_groups['SysCog']) and any(item in chosen_reviewers for item in rev_groups['CellMolec']):
        assignment_list.loc[assignment_list['Applicant']==applicant,'Check'] = True

test = assignment_list['Check'].value_counts()
print(test)
assignment_list.to_excel('test.xlsx')

Check
True    374
Name: count, dtype: int64


In [46]:
num_reviewers = len(reviewers)
num_applicants = len(applicants)
num_reviews = num_applicants*n
base_reviews = num_reviews // num_reviewers
print(base_reviews)
workload_counts = {rev: base_reviews+2 for rev in reviewers}
print(workload_counts)

53
{'Nassar': 55, 'Chirila': 55, 'Amin': 55, 'Mayoral': 55, 'Paradiso': 55, 'Truccolo': 55, 'Moore': 55, 'Fallon': 55, 'Abdelfattah': 55, 'Aizenman': 55, 'Sheinberg': 55, 'Lambert': 55, 'Berson': 55, 'Jaworski': 55}


In [4]:



#VERSION WITH ALL APPLICANTS GROUPED TOGETHER
def assign_reviewers(reviewers,applicants,n):
    # Args:
    #    reviewers: list of reviewer names
    #    applicants: list of applicant names
    #    n: single value, number of reviewers assigned to each application
    #
    # Returns:
    #    A dataframe of application assignments
    
    num_reviewers = len(reviewers)
    num_applicants = len(applicants)
    num_reviews = num_applicants*n
    
    base_reviews = num_reviews // num_reviewers #// gives floor division, will always be integer rounded down
    remainder = num_reviews - (base_reviews*num_reviewers) # number of extra applications
    
    #make a list of reviewers with base_reviews repeats of each name, + 1 for remainder number of reviews, then shuffle
    random.shuffle(reviewers)
    
    #Create a random list of faculty with repeats the number of applications each needs to read
    reviewer_pool = []
    workload_counts = {rev: base_reviews for rev in reviewers}
    for i, reviewer in enumerate(reviewers):
        if i<remainder:
            workload_counts[reviewer] += 1 #for remainder faculty, their workload is 1 extra
 
    #Create assignment list
    random.shuffle(applicants)
    assignment_list = {applicant:[] for applicant in applicants}  
    for applicant in applicants:
        #Pull list of possible reviewers based on if their workload is already full or not
        available_reviewers = [candidates for candidates, count in workload_counts.items() if count > 0]
        if len(available_reviewers) < n:
            raise ValueError(
                f"Cannot find {n} unique reviewers for '{applicant}'. The remaining pool "
                f"of reviewers with available slots ({len(available_reviewers)}) is too small."
            )
        #From available reviewers, choose n at random    
        chosen_reviewers = random.sample(available_reviewers, n)
        assignment_list[applicant] = chosen_reviewers
        for reviewer in chosen_reviewers:
            workload_counts[reviewer] -= 1
    
    #Reverse map assignments
    reviewer_workload = {reviewer:[] for reviewer in reviewers}   
    for app, revs in assignment_list.items():
        for rev in revs:
            reviewer_workload[rev].append(app)
    
    for key,value in reviewer_workload.items():
        print(f"Number of apps for {key} is {len(value)}")
        
    reviewer_workload_df = pd.DataFrame(reviewer_workload.items(),columns = ['Reviewer','Applicants'])
            
    return assignment_list, reviewer_workload_df

In [None]:
#VERSION WITH INTERNATIONAL AND TG-ELIGIBLE SEPARATE
def assign_reviewers_multipool(reviewers,applicants,n):

In [60]:
while True:
    try:
        assignment_list, reviewer_workload = assign_reviewers(reviewers,applicants,n)
        #print(workload_counts)
        break
    except ValueError as e:
        print(f"Error: {e}")

TypeError: assign_reviewers() missing 1 required positional argument: 'n'

## Section 2: Generate score cards

In [None]:
#Generate reviewer score cards


for reviewer in reviewers:
    reviewer_df = applicant_info[applicant_info['Person Name'].isin(reviewer_workload['Applicants'].loc[reviewer_workload['Reviewer']==reviewer].values[0])]
    reviewer_df.insert(0, 'Score', '') #Add blank column for Score (position, name, value)
    reviewer_df.insert(1, 'Notes', '') #Add blank column for Score (position, name, value)
    #reviewer_df = scorecard_base_df[scorecard_base_df['Reviewers'].isin([reviewer])].copy()
    #reviewer_df = reviewer_df.drop(columns = 'Reviewers')
    
    filename = f"{reviewer}_Scorecard.xlsx"
    reviewer_df.to_excel(filename,index = False)


In [35]:
#Generate reviewer score cards WITH formatting

for reviewer in reviewers:
    reviewer_df = applicant_info[applicant_info['Person Name'].isin(reviewer_workload['Applicants'].loc[reviewer_workload['Reviewer']==reviewer].values[0])]
    reviewer_df = reviewer_df.sort_values(by='Person Name')
    reviewer_df.insert(0, 'Score', '') #Add blank column for Score (position, name, value)
    reviewer_df.insert(1, 'Notes', '') #Add blank column for Score (position, name, value)
    #reviewer_df = scorecard_base_df[scorecard_base_df['Reviewers'].isin([reviewer])].copy()
    #reviewer_df = reviewer_df.drop(columns = 'Reviewers')

    filename = f"{reviewer}_Scorecard.xlsx"
    writer = pd.ExcelWriter(filename,engine = 'xlsxwriter')
    reviewer_df.to_excel(writer,index = False,header=False) #header = false, Insert header below with xlsxwriter in order to format
    
    #Format score card
    workbook = writer.book
    worksheet = writer.sheets['Sheet1']
    header_format = workbook.add_format({
        'bold': True,
        'font_size': 13,
        'align':'center',
        'font_color': '#ffffff',
        'fg_color':'#000000'       
        
    })
    for col_num, value in enumerate(reviewer_df.columns.values):
        worksheet.write(0, col_num, value, header_format) #(row,column,value,format)
    worksheet.set_column('A:T',15) #Set minimum column width
    worksheet.autofit() #Autofit (finicky, which is why I do minimum width first)

    writer.close() #Without this line the file will be corrupt
    
    
    
    

In [None]:
# To play with formatting

## Section 3: Merge completed score cards

In [51]:
#Concatenate all reviewer score cards (this is a merge I should be able to figure out myself)


#reviewers = ['Amin','Jaworski'] #IF DGS SUBMITS A SCORESHEET NEED TO ADD NAME TO REVIEWERS FOR MERGE

score_df = applicant_info.copy()

for reviewer in reviewers:
    #First review all Person Names and make sure they are in applicants
    df = pd.read_excel(f"{reviewer}_Scorecard.xlsx")
    df.rename(columns={'Score': f"{reviewer} Score"}, inplace=True) #rename Score column to include reviewer name
    score_df = pd.merge(score_df,df[[f"{reviewer} Score",'Person Name']], how = 'left',on = 'Person Name')
    #If any errors in merge (like wrong name) should flag for me to correct
score_df.head()

#Reorder columns so scores are first
num_reviewers = len(reviewers)
cols = score_df.columns.tolist()
cols = cols[-num_reviewers:]+cols[:-num_reviewers]
score_df = score_df[cols]
score_df.head()

#Add mean score
mean_score = score_df.iloc[:,:num_reviewers].mean(axis = 1)
score_df.insert(num_reviewers,'Mean',mean_score)
score_df.insert(num_reviewers+1,'Next Round?','')

score_df = score_df.sort_values(by='Mean')
score_df.head()


Unnamed: 0,Nassar Score,Amin Score,Truccolo Score,Jaworski Score,Mean,Next Round?,Person Name,School,GPA
0,,,,,,,App 1,Wash U,1.2
9,,,,,,,App 10,Brown,15.76
10,,,,,,,App 11,UMD,17.4
11,,,,,,,App 12,Upenn,19.04
12,,,,,,,App 13,Howard,20.68


In [52]:
filename = f"Round {review_round} Scores.xlsx"
writer = pd.ExcelWriter(filename,engine = 'xlsxwriter')
score_df.to_excel(writer,index = False,header=False) #header = false, Insert header below with xlsxwriter in order to format

#Format score card
workbook = writer.book
worksheet = writer.sheets['Sheet1']
header_format = workbook.add_format({
    'bold': True,
    'font_size': 13,
    'align':'center',
    'font_color': '#ffffff',
    'fg_color':'#000000'       
    
})
for col_num, value in enumerate(score_df.columns.values):
    worksheet.write(0, col_num, value, header_format) #(row,column,value,format)
worksheet.set_column('A:T',15) #Set minimum column width
worksheet.autofit() #Autofit (finicky, which is why I do minimum width first)

writer.close() #Without this line the file will be corrupt