**Author**: Dee

**Email**: deetungsten@gmail.com

**Date**: December 12th, 2018

**Version**: 0.9

**Purpose**: For Pytorch Facebook Challenge Student-Mentor Program. Optimizes and pairs students and mentors automatically according to their experience and language. 


1.   Importing the necessary files from Google Drive



In [0]:
!pip install deap
!pip install pydrive
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# choose a local (colab) directory to store the data.
local_download_path = os.path.expanduser('~/data')
try:
  os.makedirs(local_download_path)
except: pass

# 2. Auto-iterate using the query syntax
#    https://developers.google.com/drive/v2/web/search-parameters
file_list = drive.ListFile(
    {'q': "'REPLACE FOLDER ID HERE (KEEP PARENTHESIS)' in parents"}).GetList()

for f in file_list:
  # 3. Create & download by id.
  print('title: %s, id: %s' % (f['title'], f['id']))
  fname = os.path.join(local_download_path, f['title'])
  print('downloading to {}'.format(fname))
  f_ = drive.CreateFile({'id': f['id']})
  f_.GetContentFile(fname)
  
print("Files successfully imported from Google Drive")

1.   Imports the CSV files and load them to a pandas dataframe
2.   Sets the weights of importance of the traits (500 for language, 200 for skill difference)

1.   Goes through all of the students and mentors and assign the relationship weights








In [6]:
import random
import numpy as np

from deap import base, creator, tools
import pandas as pd
language_weight = 500
skill_weights = 200

skill = {'Beginner':0, 'Intermediate':1, 'Advanced':2}

students = pd.read_csv("/root/data/students.csv")
mentors = pd.read_csv("/root/data/mentors.csv")

print("List of Students")
display(students)

print("List of Mentors")
display(mentors)

number_of_students = len(students)
number_of_mentors = len(mentors)

relationship_m = np.zeros((number_of_students, number_of_mentors))

for index_s, student in students.iterrows():
    student_level = skill[student['Skill']]
    for index_m, mentor in mentors.iterrows():
        score = 0
        if student['Language'] == mentor['Language']:
            score += language_weight
        mentor_level = skill[mentor['Skill']]
        skill_difference = mentor_level - student_level
        if skill_difference > 0:
            score += skill_weights
        relationship_m[index_s,index_m] = score

print("Relationship Matrix")
print(relationship_m)

List of Students


Unnamed: 0,Name,Skill,Language
0,Test1,Beginner,Chinese
1,Test2,Intermediate,Italian
2,Test3,Beginner,English
3,Test4,Beginner,Vietnamese
4,Test5,Intermediate,English


List of Mentors


Unnamed: 0,Name,Skill,Language
0,Test6,Advanced,Chinese
1,Test7,Advanced,Italian
2,Test8,Intermediate,English
3,Test9,Intermediate,Vietnamese
4,Test10,Advanced,English


[[700. 200. 200. 200. 200.]
 [200. 700.   0.   0. 200.]
 [200. 200. 700. 200. 700.]
 [200. 200. 200. 700. 200.]
 [200. 200. 500.   0. 700.]]




1.   Creates two lists to match the students and mentors. Each list contains the possible choice index. For the current version, the number of students and mentors needs to be balanced (equal) but this be changed in future versions
2.   Defines a mututation function which just shuffles the order randomly



In [0]:
import array
import random
import json

import numpy

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from random import shuffle

distance_map = relationship_m
IND_SIZE = students.shape[0]

toolbox = base.Toolbox()

creator.create("FitnessMin", base.Fitness, weights=(1.0,))
creator.create("Individual", list, typecode='i', fitness=creator.FitnessMin)

toolbox.register("indices1", random.sample, range(IND_SIZE), IND_SIZE)
toolbox.register("indices2", random.sample, range(IND_SIZE), IND_SIZE)

toolbox.register("individual", tools.initCycle, creator.Individual,
             (toolbox.indices1, toolbox.indices2))

def myMutation(individual):
    
    shuffle(individual[0])
    shuffle(individual[1])
    return (individual,)








1.   Defines a cost function. In this case it is just the sum of the relationship cost.
2.   Runs the genetic algorithm to maximize the cost function (which maximizes the relationship)



In [4]:
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
                            

                 
def evalrel(individual):
    cost = 0
    for gene1, gene2 in zip(individual[0], individual[1]):
        cost += relationship_m[gene1][gene2]
    return cost,
                 


toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("select", tools.selBest)
toolbox.register("evaluate", evalrel)
toolbox.register("mutate", myMutation)


random.seed(169)


NGEN = 50
MU = 50
LAMBDA = 100
CXPB = 0.7
MUTPB = 0.2

pop = toolbox.population(n=MU)
hof = tools.ParetoFront()
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", numpy.mean, axis=0)
stats.register("std", numpy.std, axis=0)
stats.register("min", numpy.min, axis=0)
stats.register("max", numpy.max, axis=0)

algorithms.eaMuPlusLambda(pop, toolbox, MU, LAMBDA, CXPB, MUTPB, NGEN, stats,
                          halloffame=hof)




gen	nevals	avg    	std           	min   	max    
0  	50    	[1498.]	[564.08864552]	[600.]	[2500.]
1  	89    	[2160.]	[328.6335345] 	[1800.]	[3500.]
2  	87    	[2428.]	[249.03011866]	[2100.]	[3500.]
3  	95    	[2604.]	[369.3020444] 	[2300.]	[3500.]
4  	90    	[2732.]	[394.68468427]	[2500.]	[3500.]
5  	92    	[2852.]	[435.54104284]	[2500.]	[3500.]
6  	89    	[3024.]	[434.30864601]	[2500.]	[3500.]
7  	88    	[3372.]	[96.]         	[3300.]	[3500.]
8  	95    	[3396.]	[99.91996797] 	[3300.]	[3500.]
9  	91    	[3456.]	[82.84926071] 	[3300.]	[3500.]
10 	86    	[3500.]	[0.]          	[3500.]	[3500.]
11 	90    	[3500.]	[0.]          	[3500.]	[3500.]
12 	88    	[3500.]	[0.]          	[3500.]	[3500.]
13 	94    	[3500.]	[0.]          	[3500.]	[3500.]
14 	85    	[3500.]	[0.]          	[3500.]	[3500.]
15 	96    	[3500.]	[0.]          	[3500.]	[3500.]
16 	94    	[3500.]	[0.]          	[3500.]	[3500.]
17 	89    	[3500.]	[0.]          	[3500.]	[3500.]
18 	87    	[3500.]	[0.]          	[3500.]	[3500.]
19

([[[1, 0, 4, 2, 3], [1, 0, 4, 2, 3]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[0, 2, 3, 4, 1], [0, 2, 3, 4, 1]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 0, 2], [4, 3, 1, 0, 2]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 0, 2], [4, 3, 1, 0, 2]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[1, 0, 4, 2, 3], [1, 0, 4, 2, 3]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[1, 0, 4, 2, 3], [1, 0, 4, 2, 3]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[1, 0, 4, 2, 3], [1, 0, 4, 2, 3]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]],
  [[4, 3, 1,



1.   Double checks that the student languages are the same and the skill level difference is appropriate
2.   Output the name and language



In [7]:
print("MATCHED RESULTS")
student_list = []
mentors_list = []
language_list = []
for student_idx,mentor_idx in zip(hof.items[-1][0],hof.items[-1][1]):
      flag_language = True if (students['Language'][student_idx] == mentors['Language'][mentor_idx]) else print("Language Error") 
      
      student_skill = skill[students['Skill'][student_idx]]
      mentor_skill = skill[mentors['Skill'][student_idx]]
      flag_skill = True if (mentor_skill >=student_skill) else print("Skill Error") 
      if flag_language and flag_skill:
        student_list.append(students['Name'][student_idx])
        mentors_list.append(mentors['Name'][mentor_idx])
        language_list.append(mentors['Language'][mentor_idx])
    
result = pd.DataFrame({'Students':student_list, "Mentors":mentors_list, "Langugage":language_list})

display(result)

MATCHED RESULTS


Unnamed: 0,Langugage,Mentors,Students
0,Italian,Test7,Test2
1,Chinese,Test6,Test1
2,English,Test10,Test5
3,English,Test8,Test3
4,Vietnamese,Test9,Test4
