In [1]:
import pandas as pd
import numpy as np 
import random

In [2]:
team_names = ['Team Alan Turing', 'Team Donald Knuth', 
              'Team Ada Lovelace', 'Team Reverend Thomas Bayes', 
              'Team Andrey Markov', 'Team Grace Hopper', 
              'Team Marvin Minsky', 'Team John McCarthy',
              'Team Claude Shannon', 'Team Maryam Mirzakhani',
              'Team Karl Pearson', 'Team Karl Friedrich Gauss', 
              'Team Johan Bernoulli', 'Team Florence Nightingale',
              'Team Andrey Kolmogorov', 'Team Francis Galton',
              'Team Augustin-Louis Cauchy', 'Team Ronald Fisher', 
              'Team Annie Easley','Team Noam Chomsky',              
              'Team Pierre-Simon Laplace','Team Isaac Newton',              
              'Team Saturday @6', 'Team quiz week',              
             ]
random.seed(44)
random.shuffle(team_names)

In [3]:
# The CSV containing students and their sections. 
df = pd.read_csv("../../../common/GitHubLMS/data/v_students2019_blocks-5-6.csv")

In [4]:
# Group students by their sections
grps = df.groupby('lab')

# How many students in each section? 
df['lab'].value_counts()

L01    47
L02    47
Name: lab, dtype: int64

In [5]:
# Decide how many teams do you want in each section
L01_NTEAMS = 12
L02_NTEAMS = 12

In [6]:
# Create dataframes for different sections
L01_df = grps.get_group('L01')
L02_df = grps.get_group('L02')

In [7]:
def shuffle_and_make_teams(df, nteams=12):
    """
    Given a pandas dataframe df and the number of expected teams nteams, 
    this function shuffles the rows in the dataframe assigns an appropriate 
    team id to each row.
    
    Argumeents
    
    df -- (pandas DataFrame) a dataframe from a section where you want to form teams 
    nteams -- (int) the number of teams you want to create (default=12)
    
    Return 
    df -- (pandas DataFrame)
    """
    global team_id
    dfs = []
    # passing random state just to make it reproducible. 
    df_shuffled = df.sample(frac=1.0, random_state = 44)
    group_dfs = np.array_split(df_shuffled, indices_or_sections=nteams)
    for gdf in group_dfs:
        gdf['team_id'] = team_id
        gdf['team_name'] = team_names[team_id-1]
        team_id += 1
        dfs.append(gdf)
    return pd.concat(dfs)

In [8]:
team_id = 1
L01_teams_df = shuffle_and_make_teams(L01_df, nteams=L01_NTEAMS)
L02_teams_df = shuffle_and_make_teams(L02_df, nteams=L02_NTEAMS)

final_teams_df = pd.concat([L01_teams_df, L02_teams_df])

In [9]:
final_teams_df.to_csv('teams.csv', 
                      columns=['surname','given_name',
                               'lab', 'cwl', 'team_id', 'team_name'], 
                      index=False)