In [2]:
import pandas as pd
import numpy as np

In [3]:
mentees = pd.read_csv("VSRP_Mentees.csv")
mentors = pd.read_csv("VSRP_Mentors.csv")
rubric = pd.read_csv("VSRP_Rubric_Scores.csv")

In [4]:
def get_name(first, middle, last):
    if not pd.isna(middle):
        name = first + " " + middle + " " + last
    else:
        name = first + " " + last
    return name

In [5]:
mentees["Applicant Name"] = mentees[["First Name", 
              "Middle Name", 
              "Last Name"]].apply(lambda x: get_name(*x), axis=1, result_type='expand')

In [6]:
duplicates = mentees["Applicant Name"].duplicated()
duplicates = [not dup for dup in duplicates]

In [7]:
mentees = mentees[duplicates]

In [8]:
duplicates = rubric["Applicant Name"].duplicated()
duplicates = [not dup for dup in duplicates]
rubric = rubric[duplicates]

In [9]:
mentees["Final Score"] = rubric["Final Score"]

In [10]:
mentees.sort_values(by=['Final Score'], inplace=True, ascending=False)
mentees.reset_index(inplace=True)
mentees.head(1)

Unnamed: 0,index,Submission #,Date/Time Submitted,Last Modified,Archived?,Approved?,Member ID,Admin Comments,APSA's VSRP 2021 Mentee Application,First Name,...,I understand that my resume will be sent out to potential mentors,"I understand that potential mentors may contact me, but this does NOT guarantee a position",What is your motivation for applying to VSRP?,Explain your interest in combining medicine and research in your future career,"Have you experienced any barriers to obtaining research experience, and if so, please describe.",What did you learn from the 'Day in The Life of a Physician Scientist' Interactive Session?,Are you interested in receiving resources supporting applicants to MD/DO-PhD programs?,I acknowledge that my anonymized responses may be used in research to improve VSRP,Applicant Name,Final Score
0,169,24710708,2021-04-08 19:38:43,2021-04-08 19:38:43,No,No,,,,Sally,...,Yes,Yes,I want to learn more about what a physician-sc...,I am interested in combining medicine and rese...,I have experienced barriers in obtaining resea...,"From the ""Day in The Life of a Physician Scien...",Yes,Yes,Sally Mei,12.0


In [11]:
TAG_SKILLS_PI = "What skills would be helpful for mentees to have?  Note that we cannot guarantee this."

TAG_STUDENT_SKILLS = "What skills do you have that you could offer a lab? (not necessary for being matched)"

TAG_STUDENT_SKILLS_TO_LEARN = "What skills would you like to learn?"

TAG_GRAD = "When do you plan to apply to a professional school (e.g., MD, MD/PhD, DO/PhD, graduate, or masters)?"

TAG_CONTACT = "Contact email for receiving applicant resumes"

TAG_MENTOR_EMAIL = "Email of direct mentor if not yourself (otherwise N/A):"

TAG_STUDENT_EMAIL = "Email Address"

TAG_STUDENT_FIELD = "What research field(s) are you interested in?"

TAG_PI_FIELD = "Lab Research field"

In [12]:
WEIGHT_FIELD = 6
WEIGHT_CURRENT_SKILLS = 2
WEIGHT_FUTURE_SKILLS = 0.2

In [13]:
def calculate_score_mentee_mentor(PI_field, PI_skills, mentee_field, mentee_skills, mentee_future_skills):
    fields_PI = PI_field.split("|")
    skills_PI = PI_skills.split("|")
    fields_mentee = mentee_field.split("|")
    skills_mentee = mentee_skills.split("|")
    new_skills_mentee = mentee_future_skills.split("|")
    
    score = 0
    
    for field in fields_PI:
        if field in fields_mentee:
            score = score + WEIGHT_FIELD
    
    for skill in skills_PI:
        if skill in skills_mentee:
            score = score + WEIGHT_CURRENT_SKILLS
        elif skill in new_skills_mentee:
            score = score + WEIGHT_FUTURE_SKILLS
    
    return score

In [14]:
def calculate_score_of_mentee_w_all_mentor(field, skills, future_skills):
    return mentors[[TAG_PI_FIELD, 
                   TAG_SKILLS_PI]].apply(lambda x: calculate_score_mentee_mentor(*x, field, skills, future_skills), 
                                                                                             axis=1, result_type='expand')

In [15]:
entire2Ddf = mentees[[TAG_STUDENT_FIELD, 
              TAG_STUDENT_SKILLS, 
              TAG_STUDENT_SKILLS_TO_LEARN]].apply(lambda x: calculate_score_of_mentee_w_all_mentor(*x), axis=1, result_type='expand')

In [16]:
entire2Darray = entire2Ddf.to_numpy()
entire2Darray

array([[ 6.8,  8. , 20.4, ..., 14.8, 14.8, 18.2],
       [10.6,  2. ,  0.4, ...,  4.6,  6.4,  0.2],
       [14.6,  0. , 14.4, ..., 14.4, 14.4, 12. ],
       ...,
       [20.4,  0. , 24.2, ..., 20. , 18.2, 14. ],
       [18.6,  0. ,  2.6, ..., 16.4, 18.2, 12.2],
       [12.6,  2. ,  6.4, ..., 14.2, 14.4, 12. ]])

In [17]:
matches = np.zeros((len(mentees), len(mentors)), dtype=bool)
matches
for row in range(len(entire2Darray)):
    mentor_list = entire2Darray[row, :].argsort()[-3:][::-1]
    for mentor in mentor_list:
        matches[row, mentor] = True
        if np.sum(matches[:, mentor]) >= 8:
            entire2Darray[:, mentor] = -1    

In [22]:
mentee_emails = []
mentee_names = []
for col in range(matches.shape[1]):
    email = [mentees["Applicant Name"][match] + " (" + mentees["Email Address"][match] + ")" for match in matches[:, col].nonzero()[0]]
    mentee_emails.append(email)

In [24]:
# mentors["Mentee Emails"] = mentee_emails
# mentors["Mentee Name"] = mentee_names
mentors["Mentee Name"] = mentee_emails
mentors.head(2)

Unnamed: 0,Submission #,Date/Time Submitted,Last Modified,Archived?,Approved?,Member ID,Admin Comments,Virtual Summer Research Program (VSRP): Register to be a Mentor!,First Name,Last Name,...,I agree that the PI (if not myself) will meet with the mentee at least twice during the program,I agree to welcome a mentee into your lab for a minimum of 8 weeks?,I understand that students may not begin work until they are officially matched with me on 6/01/21,"I have the flexibility for this summer research opportunity to extend past August 6th, 2021.",Do you have avenues to fund this summer research opportunity? (Full funding: $2000/8 weeks),How did you hear about VSRP?,"If you selected other, please elaborate.",What is your motivation for applying to be a VSRP mentor?,Are you interested in informally mentoring a student who shares an aspect of the identity?,Mentee Name
0,24350986,2021-03-03 22:48:25,2021-03-03 22:48:25,No,No,,,,Hongxia,Ren,...,Yes,Yes,Yes,"Yes, months.",Cannot provide any funding,Email listserv,,I had a very good experience last year with th...,No,"[Shrika Ravichandran (shrikar@g.clemson.edu), ..."
1,24352456,2021-03-04 07:50:26,2021-03-04 07:50:26,No,No,,,,test,test,...,Yes,Yes,Yes,"Yes, weeks.",Maybe,Other,,test,Maybe,"[Elizabeth Donovan (donovane@lawrence.edu), Sh..."


In [25]:
mentors.to_csv("Mentors_matches.csv")