# Group Formation

## Data

In [5]:
#features = ["clickrate_active"] # change features according to your data
#columns = ["code_module","code_presentation","id_student"] + features
#subdata = data[columns]

In [6]:
def pool_student(studentID_dict,code_module,code_presentation):
    return studentID_dict["unique"][code_module,code_presentation]

## Genetic algorithm

In [7]:
def population(initial_individual, nbr_population):
    population=[]
    for i in range(nbr_population):
        population.append(random.sample(list(initial_individual), initial_individual.shape[0]))
    return population

In [8]:
def fitness(subdata_norm,norm_column,individual,nbr_groups,nbr_not_affected):
    diff=0
    for column in norm_column: 
        c_r = subdata_norm[column].mean()
        i=0
        for g in range(nbr_groups):
            group = individual[i:i+nbr_members]
            learner_group=subdata_norm[subdata_norm.id_student.isin(group)][column]
            c_r_g = np.mean(learner_group)
            diff+=np.power(c_r_g-c_r,2)
            i+=nbr_members
        
        group = individual[i:i+nbr_not_affected]
        learner_group=subdata_norm[subdata_norm.id_student.isin(group)][column]
        c_r_g = np.mean(learner_group)
        diff+=np.power(c_r_g-c_r,2)
    

    Error=np.sqrt((1/len(norm_column))*(1/nbr_groups)*diff)*100

    return Error

In [9]:
def selection(population_list, scores,k=3):
    selection_ix = randint(0,len(population_list))
    for ix in randint(0, len(population_list), k-1):
        if scores[ix] < scores[selection_ix]:
            selection_ix = ix
    return population_list[selection_ix]

In [10]:
def crossover(p1, p2, r_cross):
    c1, c2 = p1.copy(), p2.copy()
    if rand() < r_cross:
        pt = randint(1, len(p1)-2)
        c1 = p1[:pt] + p2[pt:]
        c2 = p2[:pt] + p1[pt:]
    return [c1, c2]

In [11]:
def mutation(bitstring, r_mut):
    for i in range(len(bitstring)):
        if rand() < r_mut:
            index_mutation = randint(0, len(bitstring))
            if index_mutation==i:
                index_mutation = randint(0, len(bitstring))
            else: 
                extra=bitstring[i]
                bitstring[i] = bitstring[index_mutation]
                bitstring[index_mutation] = extra

In [12]:
def genetic_algorithm(subdata_norm, norm_column, initial_individual, n_bits, nbr_iteration, nbr_population, nbr_groups,nbr_not_affected, r_cross, r_mut):
    population_list = population(initial_individual, nbr_population)
    best, best_eval = population_list[0], fitness(subdata_norm, norm_column, population_list[0],nbr_groups,nbr_not_affected)
    for gen in range(nbr_iteration):
        scores = [fitness(subdata_norm, norm_column, population_,nbr_groups,nbr_not_affected) for population_ in population_list]
        for i in range(nbr_population):

            if scores[i] < best_eval:
                best, best_eval = population_list[i], scores[i]
        selected = [selection(population_list, scores) for _ in range(nbr_population)]
        
        children = []
        j=0
        
        while j < nbr_population-1:
            p1,p2 = selected[j], selected[j+1]
            if len(set(p1)) == len(initial_individual) & len(set(p2)) == len(initial_individual):
                for c in crossover(p1, p2, r_cross):
                    mutation(c, r_mut)
                    if len(set(c)) == len(initial_individual):
                        children.append(c)
                        j+=1

        population_list = children
    return [best, best_eval]

## Assigning groups

In [13]:
def assign_group(best,nbr_members,nbr_groups,nbr_not_affected):
    groups = [ j+1  for j in range(nbr_groups) for i in range(nbr_members)]
    group_not_affected = [nbr_groups+1 for k in range(nbr_not_affected)]
    groups_total = groups + group_not_affected
    dict_groups = dict(zip(best,groups_total))
    return dict_groups

In [14]:
def grouping(subdata,nbr_members,columns):
    
    subdata_norm = subdata.copy()
    norm_column = [c for c in columns if c not in {"code_module","code_presentation","id_student"}]
    for column in norm_column:
        subdata_norm[column] = subdata[column].map(lambda x: (x-subdata[column].min())/(subdata[column].max()-subdata[column].min()))
    studentID_dict = subdata.groupby(["code_module","code_presentation"],as_index=False).agg(["unique"])["id_student"].to_dict()
    course_dict = subdata.groupby(["code_module"],as_index=False).agg(["unique"])["code_presentation"].to_dict()
    dict_groups_list=[]

    for code_module in course_dict["unique"].keys():
        for code_prez in course_dict["unique"][code_module]:
            initial_individual = pool_student(studentID_dict,code_module,code_prez)
            
            nbr_iteration = 1
            nbr_bits = initial_individual.shape[0]
            nbr_population = 20
            r_cross = 0.9
            r_mut = 1.0 / float(nbr_bits)
            nbr_groups = int(initial_individual.shape[0]/nbr_members)
            nbr_not_affected = initial_individual.shape[0] - nbr_members*nbr_groups
            
            best = genetic_algorithm( subdata_norm, norm_column, initial_individual, nbr_bits, nbr_iteration, nbr_population, nbr_groups,nbr_not_affected, r_cross, r_mut)
            
            dict_groups = assign_group(best[0],nbr_members,nbr_groups,nbr_not_affected)
            dict_groups_list.append(dict_groups)
            
    return dict(ChainMap(*dict_groups_list))

FONCTION GLOBALE

In [15]:
def fonction_globale(learningstyle,nbr_members,subdata,columns):
  return grouping(subdata,nbr_members,columns)

✅ MAIN

In [16]:
learningstyle="clickrate_active"
nbr_members=5

def FormGr(learningstyle,nbr_members):
  import pandas as pd
  import numpy as np
  import random
  from numpy.random import randint
  from numpy.random import rand

  import warnings
  warnings.filterwarnings('ignore')
  from collections import ChainMap
  from google.colab import drive
  drive.mount('/content/drive')
  data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks (1)/clickls_per_module.csv")

  features = [learningstyle]
  columns = ["code_module","code_presentation","id_student"] + features
  subdata = data[columns]

  dict_grouping = fonction_globale(learningstyle,nbr_members,subdata,columns)
  subdata["grouping"] = subdata["id_student"].map(lambda x: dict_grouping[x])

  subdata= subdata[['code_module','code_presentation','id_student','grouping']]

  path = '/content/drive/MyDrive/Colab Notebooks/Grouping_on_'+learningstyle+'.csv'
  with open(path, 'w', encoding = 'utf-8-sig') as f:
    subdata.to_csv(f)

## Results

In [17]:
#nbr_members = 5

#dict_grouping = grouping(subdata,nbr_members,columns)

#subdata["grouping"] = subdata["id_student"].map(lambda x: dict_grouping[x])

In [18]:
#subdata= subdata[['code_module','code_presentation','id_student','grouping']]
#subdata

In [19]:
#path = '/content/drive/MyDrive/Colab Notebooks/Grouping.csv'
#with open(path, 'w', encoding = 'utf-8-sig') as f:
#  subdata.to_csv(f)

In [20]:
#subdata=pd.merge(subdata,data,on=['code_module','code_presentation','id_student'])
#subdata

In [21]:
#path = '/content/drive/MyDrive/Colab Notebooks/GroupingLS.csv'
#with open(path, 'w', encoding = 'utf-8-sig') as f:
#  subdata.to_csv(f)