In [None]:
import numpy as np
import pandas as pd
import regex as re
import time

coaches = pd.read_csv("../DATA/coaches.csv").fillna('')
students = pd.read_csv("../DATA/students.csv").fillna('')
studentProjectNames = pd.read_csv('../DATA/studentProjectNames.csv', names = ['student', 'project'])

In [None]:
def JaccardSim(listA, listB):
    if len(listA) == 0 or len(listB) == 0:
        return 0.0
    return len(set(listA).intersection(listB)) / len(set(listA).union(listB))

In [None]:
def StudentSimilarityMentor(allstudents, allcoaches, t, tupleBinary):
    start = time.time()
    
    similarities = []
    for i in range(len(allstudents)):
        similarity = []
        studentLanguages = []
        for k in range(3, 8):
            studentLanguages += [x for x in allstudents.iloc[i, k].split(';') if x != '']
        
        s1 = allstudents.iloc[i, 8].split(';')
        s2 = allstudents.iloc[i, 9].split(';')
        s3 = []
        for k in range(len(s2)):
            s3 += s2[k].lower().split()
        studentSkills = [x for x in s1 + s3 if x != '']

        for j in range(len(allcoaches)):   
            coachLanguages = [x for x in list(allcoaches.iloc[j,24:61]) if x != '']
            coachSkills = [x.lower() for x in list(allcoaches.iloc[j, 61:67]) if x != ''] + allcoaches.iloc[j, 23].lower().split()

            jaccardLang = JaccardSim(studentLanguages, coachLanguages)
            jaccardSkill = JaccardSim(studentSkills, coachSkills)

            similarity.append([jaccardSkill, jaccardLang, j])
            
        if t == 'sl':
            similarities.append(list(reversed(sorted([[x[0] + x[1], x[2]] for x in similarity], key = lambda x : x[0]))))
        elif t == 's':
            similarities.append(list(reversed(sorted([[x[0], x[2]] for x in similarity], key = lambda x : x[0]))))
        elif t == 'l':
            similarities.append(list(reversed(sorted([[x[1], x[2]] for x in similarity], key = lambda x : x[0]))))
        else:
            raise ValueError('Wrong tuning parameter')

    sims = []
    for i in range(len(similarities)):
        sims.append(list(map(lambda x : x[-1], similarities[i])))

    topics = []
    for i in range(len(similarities)):
        topics.append(list(map(lambda x : allcoaches.iloc[x, 23], sims[i])))
        
    topics = pd.DataFrame(topics, index= list(map(lambda x : x.split()[0], allstudents.iloc[:,1]))).transpose()
            
    titles = []
    for i in range(len(similarities)):
        titles.append(list(map(lambda x : allcoaches.iloc[x, 3], sims[i])))

    titles = pd.DataFrame(titles, index= list(map(lambda x : x.split()[0], allstudents.iloc[:,1]))).transpose()

    if tupleBinary == 0:
        names = []
        for i in range(len(similarities)):
            names.append(list(map(lambda x : allcoaches.iloc[x[-1], 1].split('@')[0], similarities[i])))
            
    elif tupleBinary == 1:
        names = []
        for i in range(len(similarities)):
            names.append(list(map(lambda x : allcoaches.iloc[x[-1], 1].split('@')[0] + '#' +  str(int(x[0]* 10**3)), similarities[i])))
        
    else:
        raise ValueError('Please Enter a tuple parameter of 1 or 0')

    names = pd.DataFrame(names, index= list(map(lambda x : x.split()[0], allstudents.iloc[:,1]))).transpose()
    end = time.time()
    return names

In [None]:
def MentorPreferences(allcoaches, allstudents):
    preferences = []
    for i in range(len(allcoaches)):
        mentorProjectInterest = []
        for j in range(13, 22):
            mentorProjectInterest += re.findall(r'"(.*?)"', coaches.iloc[i, j])

        mentorStudentInterest = []
        for j in range(len(mentorProjectInterest)):
            for k in range(len(studentProjectNames.iloc[:, 0])):
                if len(set(mentorProjectInterest[j].split()).intersection(studentProjectNames.iloc[k, 1].split())) > 1:
                    mentorStudentInterest.append(studentProjectNames.iloc[k, 0])

        preferences.append(mentorStudentInterest)

    names = pd.DataFrame(preferences, index= list(map(lambda x : x.split('@')[0], allcoaches.iloc[:,1]))).transpose()
    
    return names

In [None]:
def FeasibleMatching(studentPreferences, mentorPreferences):
    start = time.time()
    sc = list(studentPreferences.columns)

    for i in range(len(sc)):
        for j in range(len(list(studentPreferences.iloc[:,i]))):
            if sc[i] not in list(mentorPreferences.loc[:, studentPreferences.iloc[j, i]]):
                studentPreferences.iloc[j, i] = ''

    end = time.time()
    
    return studentPreferences

In [None]:
def match1(studentpreferences, allcoaches, t):

    names = [x.split('@')[0] for x in list(allcoaches.iloc[:, 1])]
    d = dict((x, 0) for x in names)
    matching = []
    
    for i in range(len(studentpreferences.columns)):
        
        student = studentpreferences.columns[i]
        
        for j in range(len(studentpreferences.index)):
            
            others = list(studentpreferences.iloc[j, :i]) + list(studentpreferences.iloc[i+1:, j])
            mentor = studentpreferences.iloc[j, i]
            
            if mentor in others:
                if d[mentor] < t:
                    d[mentor] += 1
                    matching.append([mentor, student])
                    break
            else:
                 if d[mentor] < t:
                    d[mentor] += 1
                    matching.append([mentor, student])
                    break

    matching = pd.DataFrame(matching, columns=['mentor', 'student'])
    return matching

In [None]:
def match2(studentpreferences, allcoaches, t, MenteeNumConstraint):
    
    def getmaxtuple(lstoflsts):
        res = lstoflsts[0]
        for i in range(1, len(lstoflsts)):
            if lstoflsts[i][1] > res[1]:
                res = lstoflsts[i]
        return res
    
    names = [x.split('@')[0] for x in list(allcoaches.iloc[:, 1])]
    if MenteeNumConstraint == 0:
        d = dict((x, 0) for x in names)
    elif MenteeNumConstraint == 1:
        yesNo = list(map(lambda x : t if x == 'Yes' else 1, list(allcoaches.iloc[:,4])))
        d = dict()
        for i in range(len(yesNo)):
            if yesNo[i] == 1:
                d[names[i]] = 0
            else:
                d[names[i]] = t - 1
    else:
        raise ValueError('last parameter must be 0 or 1')
    
    matching = []
    paired = []
    
    for j in range(len(studentpreferences.index)):

        mentors = [[x.split('#')[0], int(x.split('#')[1])] for x in list(studentpreferences.iloc[j, :])]
        
        while getmaxtuple(mentors)[1] > 0:

            maxtuple = getmaxtuple(mentors)
            mentor = maxtuple[0]
            ind = mentors.index(maxtuple)
            student = studentpreferences.columns[ind]
            
            if d[mentor] < t and not(student in paired):
                d[mentor] += 1
                matching.append([mentor, student])
                paired.append(student)
            
            mentors[ind][1] = 0

    matching = pd.DataFrame(matching, columns=['mentor', 'mentee'])
    return matching