### Learning Engagement Random Assignment

In [15]:
import numpy as np
import random
import pandas as pd 
import datetime

In [16]:
def read_file(file_name, index_col = 'id'):
    '''
    This method accepts a comma sepreated text file and loads it into a list/ or a csv file and loads it into a df 
    '''
    if file_name.split('.')[1] == "txt":
        my_file = open(file_name, 'r')
        subjects = my_file.readline()
        subjects = subjects.split(',')
        return subjects
    else: #csv
        subjects = pd.read_csv(file_name, index_col = index_col)
        return subjects

In [17]:
def random_treatment_assignment(subjects, questions, time_points = [10, 20, 30, 40, 50, 60],
                                class_time = (2020, 11, 28, 18, 30), total_treatments = 2, experimenters = ['NoOne'],
                                subjects_split = 0.5, swap = True, subjects_omit = [], save_file_name = "random_assignment.csv"):
    '''
    This method randomize treatment assignment as well as treatment administration times.
    
    subjects          -- seq of subjects' names/ids
    questions         -- the questions to ask the subjects
    time_points       -- seq of times past the beginning of the class to administrate the treatment
    class_time        -- a list with the following format (year, month, day, hours, min) / can also be a datetime
    total_treatments  -- number of planned treatments
    subjects_split    -- fruction of subjects to assign to treatment
    swap              -- if the flag is set we generate 2 treatments and 
                         the assignment to treatment and control swaps for the second treatment
    file_name         -- the name of the file to save the results
    '''
    
    # drop duplicate and omitted subjects
    subjects_set = list(set(subjects))
    sub_omit_set = list(set(subjects_omit))
    subjects_ = [s for s in subjects_set if s not in sub_omit_set]
    
    class_time = datetime.datetime(*class_time)
    n_subjects = len(subjects_)  
    # choose the time slots when to administrate the treatment without replacement
    time_slots = np.sort(random.sample(time_points, k = total_treatments))
    treatment = []  
    treat_slot = []
    
    # if we use swap strategy, we randomize only for the first time slot and swap the assignment for the second
    if swap:
        if len(time_slots) < 2:
            raise IllegalArgumentError("In order to use the swap strategy total_treatments must be 2 (or greater) and at least 2 time_points")
        treat_slot = [time_slots[0]] * n_subjects + [time_slots[1]] * n_subjects
        # randomize subjects without replacement
        treat_subjects = random.sample(subjects_, k = int(subjects_split * n_subjects))
        treatment = [1 if x in treat_subjects else 0 for x in subjects_]
        treatment.extend(list(1 - np.asarray(treatment))) # swap assignment
    else:
        for i in range(total_treatments):     
            treat_slot.extend([time_slots[i]] * n_subjects)
            treat_subjects = random.sample(subjects_, k = int(subjects_split * n_subjects))
            treatment.extend([1 if x in treat_subjects else 0 for x in subjects_])
            
    # randomize questions - with replacement
    questions = [random.choices(questions)[0] if x == 1 else " " for x in treatment]
    experimenter = [random.choices(experimenters)[0] if x == 1 else " " for x in treatment]
    dates = [class_time + datetime.timedelta(minutes = int(x)) for x in treat_slot]
    
    df = pd.DataFrame({'date': dates, 'time_slot': treat_slot, 'subject': subjects_ * total_treatments, 
                       'treatment': treatment,'question': questions, 'experimenter': experimenter})
    df.to_csv(save_file_name)
    return df


In [18]:
def row_style(row, min_):
    if row.date <= min_:
        return pd.Series('background-color: lightblue', row.index)
    else:
        return pd.Series('background-color: lightpink', row.index)

In [19]:
def display_assignment(df_, researchers):
    min_ = min(df_.date)
    for r in researchers:
        print("\n\n\n* Experimenter: ", r)
        df_r = df_[df_.experimenter == r][['date', 'subject', 'question', 'experimenter']]
        display(df_r.style.apply(row_style, axis=1, args = [min_]))

    print("\n\n*****  The full treatment assignment:  ******")    
    display(df_.style.apply(row_style, axis=1, args = [min_]))

## Section 266, 4:00 pm November 3rd 

In [6]:
#### section 4: 4:30 pm October 29th #####

# load roster
subjects_430 = read_file("roster_section_266.csv")

# load researchers list of omitted
researchers = read_file("researchers_266.txt")
omit_students = read_file("omitted_subjects_266_11032020.txt") + researchers

# load questions
questions = read_file("questions.txt")

date_exp1 = (2020, 10, 29, 16, 30)
results_file = "oct_29_sec_430_assignment.csv"

df = random_treatment_assignment(subjects_430.name, questions, experimenters = researchers,
                                 subjects_omit = omit_students, class_time = date_exp1, 
                                 save_file_name = results_file)

display_assignment(df, researchers)





* Experimenter:  Vineetha Nalini


Unnamed: 0,date,subject,question,experimenter
0,2020-10-29 16:40:00,Rajiv Nair,what are we discussing? I missed it.,Vineetha Nalini
2,2020-10-29 16:40:00,Wade Holmes,what are we discussing? I missed it.,Vineetha Nalini
4,2020-10-29 16:40:00,Lester Yang,what are we discussing? I missed it.,Vineetha Nalini
6,2020-10-29 16:40:00,Lingyao Meng,what are we discussing? I missed it.,Vineetha Nalini
9,2020-10-29 17:10:00,Tim Chen,what are we discussing? I missed it.,Vineetha Nalini
11,2020-10-29 17:10:00,Poonam Parhar,what are we discussing? I missed it.,Vineetha Nalini
13,2020-10-29 17:10:00,Hailey Wu,what are we discussing? I missed it.,Vineetha Nalini
15,2020-10-29 17:10:00,Kenneth Pong,what are we discussing? I missed it.,Vineetha Nalini




*****  The full treatment assignment:  ******


Unnamed: 0,date,time_slot,subject,treatment,question,experimenter
0,2020-10-29 16:40:00,10,Rajiv Nair,1,what are we discussing? I missed it.,Vineetha Nalini
1,2020-10-29 16:40:00,10,Tim Chen,0,,
2,2020-10-29 16:40:00,10,Wade Holmes,1,what are we discussing? I missed it.,Vineetha Nalini
3,2020-10-29 16:40:00,10,Poonam Parhar,0,,
4,2020-10-29 16:40:00,10,Lester Yang,1,what are we discussing? I missed it.,Vineetha Nalini
5,2020-10-29 16:40:00,10,Hailey Wu,0,,
6,2020-10-29 16:40:00,10,Lingyao Meng,1,what are we discussing? I missed it.,Vineetha Nalini
7,2020-10-29 16:40:00,10,Kenneth Pong,0,,
8,2020-10-29 17:10:00,40,Rajiv Nair,0,,
9,2020-10-29 17:10:00,40,Tim Chen,1,what are we discussing? I missed it.,Vineetha Nalini


## Section 5, 6:30 pm November 19th

In [25]:
#### section 5: 6:30 pm October 29th #####

# load roster
subjects_630 = read_file("roster_section_630.csv")

# load researchers list of omitted
researchers = read_file("researchers_630.txt")
omit_students = read_file("omitted_subjects.txt") + researchers

# load questions
questions = read_file("questions.txt")

date_exp2 = (2020, 11, 19, 18, 30)
results_file = "nov_19_sec_630_assignment.csv"

df = random_treatment_assignment(subjects_630.name, questions, experimenters = researchers,
                                 subjects_omit = omit_students, class_time = date_exp2, 
                                 save_file_name = results_file)

display_assignment(df, researchers)




* Experimenter:  Dana Kaban


Unnamed: 0,date,subject,question,experimenter
5,2020-11-19 18:40:00,BK (Baokui) Yang,what are we discussing? I missed it.,Dana Kaban
11,2020-11-19 19:10:00,MENG-HSIEN (Steven),what are we discussing? I missed it.,Dana Kaban
14,2020-11-19 19:10:00,Koohong Chung,what are we discussing? I missed it.,Dana Kaban
15,2020-11-19 19:10:00,Nobu Yamaguchi,what are we discussing? I missed it.,Dana Kaban





* Experimenter:  Casey King


Unnamed: 0,date,subject,question,experimenter
1,2020-11-19 18:40:00,Thomas Hamnett,what are we discussing? I missed it.,Casey King
2,2020-11-19 18:40:00,LIN Brian Schoenleber,what are we discussing? I missed it.,Casey King
4,2020-11-19 18:40:00,Menglu He,what are we discussing? I missed it.,Casey King
8,2020-11-19 19:10:00,Kevin Kory,what are we discussing? I missed it.,Casey King




*****  The full treatment assignment:  ******


Unnamed: 0,date,time_slot,subject,treatment,question,experimenter
0,2020-11-19 18:40:00,10,Kevin Kory,0,,
1,2020-11-19 18:40:00,10,Thomas Hamnett,1,what are we discussing? I missed it.,Casey King
2,2020-11-19 18:40:00,10,LIN Brian Schoenleber,1,what are we discussing? I missed it.,Casey King
3,2020-11-19 18:40:00,10,MENG-HSIEN (Steven),0,,
4,2020-11-19 18:40:00,10,Menglu He,1,what are we discussing? I missed it.,Casey King
5,2020-11-19 18:40:00,10,BK (Baokui) Yang,1,what are we discussing? I missed it.,Dana Kaban
6,2020-11-19 18:40:00,10,Koohong Chung,0,,
7,2020-11-19 18:40:00,10,Nobu Yamaguchi,0,,
8,2020-11-19 19:10:00,40,Kevin Kory,1,what are we discussing? I missed it.,Casey King
9,2020-11-19 19:10:00,40,Thomas Hamnett,0,,


## Section 3, 6:30 pm Wednesday November 18th

In [8]:
#### Section 3, 6:30 pm Wednesday November 18th #####

# load roster
subjects_630 = read_file("roster_section_241_Wed_630.csv")

# load researchers list of omitted
researchers = read_file("researchers_section_241_Wed_630.txt")
omit_students = read_file("omitted_subject_section_241_Wed_630.txt") + researchers

# load questions
questions = read_file("questions_section_241_Wed_630.txt")

date_exp2 = (2020, 10, 29, 18, 30)
results_file = "nov_18_sec_630_assignment.csv"

df = random_treatment_assignment(subjects_630.name, questions, experimenters = researchers,
                                 subjects_omit = omit_students, class_time = date_exp2, 
                                 save_file_name = results_file)

display_assignment(df, researchers)




* Experimenter:  Alexandra Savelieva


Unnamed: 0,date,subject,question,experimenter
1,2020-10-29 19:10:00,Jacob,what are we discussing? I missed it.,Alexandra Savelieva
3,2020-10-29 19:10:00,Hao Wu,what are we discussing? I missed it.,Alexandra Savelieva
5,2020-10-29 19:30:00,Jorge Hernandez,what are we discussing? I missed it.,Alexandra Savelieva
7,2020-10-29 19:30:00,Derrick Hee,what are we discussing? I missed it.,Alexandra Savelieva
9,2020-10-29 19:30:00,Clayton Monis,what are we discussing? I missed it.,Alexandra Savelieva




*****  The full treatment assignment:  ******


Unnamed: 0,date,time_slot,subject,treatment,question,experimenter
0,2020-10-29 19:10:00,40,Jorge Hernandez,0,,
1,2020-10-29 19:10:00,40,Jacob,1,what are we discussing? I missed it.,Alexandra Savelieva
2,2020-10-29 19:10:00,40,Derrick Hee,0,,
3,2020-10-29 19:10:00,40,Hao Wu,1,what are we discussing? I missed it.,Alexandra Savelieva
4,2020-10-29 19:10:00,40,Clayton Monis,0,,
5,2020-10-29 19:30:00,60,Jorge Hernandez,1,what are we discussing? I missed it.,Alexandra Savelieva
6,2020-10-29 19:30:00,60,Jacob,0,,
7,2020-10-29 19:30:00,60,Derrick Hee,1,what are we discussing? I missed it.,Alexandra Savelieva
8,2020-10-29 19:30:00,60,Hao Wu,0,,
9,2020-10-29 19:30:00,60,Clayton Monis,1,what are we discussing? I missed it.,Alexandra Savelieva


## Testing

In [71]:
# reload assignment
assignment = pd.read_csv("oct_29_sec_630_assignment.csv")
assignment

Unnamed: 0.1,Unnamed: 0,date,time_slot,subject,treatment,question
0,0,2020-10-29 18:50:00,20,Menglu He,0,
1,1,2020-10-29 18:50:00,20,Joanna Yu,1,I spaced out. what are we debating?
2,2,2020-10-29 18:50:00,20,Nobu Yamaguchi,1,I spaced out. what are we debating?
3,3,2020-10-29 18:50:00,20,BK (Baokui) Yang,0,
4,4,2020-10-29 18:50:00,20,MENG-HSIEN (Steven),0,
5,5,2020-10-29 18:50:00,20,Koohong Chung,0,
6,6,2020-10-29 18:50:00,20,Kevin Kory,0,
7,7,2020-10-29 18:50:00,20,Thomas Hamnett,1,I spaced out. what are we debating?
8,8,2020-10-29 18:50:00,20,Justin Trobec,1,I spaced out. what are we debating?
9,9,2020-10-29 18:50:00,20,LIN Brian Schoenleber,1,I spaced out. what are we debating?


In [155]:
dummy_questions = ["How is your day?", "What is he talking about?"]
dummy_subjects = ["DK", "VR", "BK", "AS", "CK"]
time_starting_points = [10, 20, 30, 40, 50, 60] # omit first and last 10 min

In [158]:
# example
df = random_treatment_assignment(dummy_subjects, dummy_questions, total_treatments = 2, swap = False)
df

Unnamed: 0,date,time_slot,subject,treatment,question,experimenter
0,2020-11-28 18:50:00,20,VR,0,,
1,2020-11-28 18:50:00,20,DK,0,,
2,2020-11-28 18:50:00,20,AS,1,What is he talking about?,NoOne
3,2020-11-28 18:50:00,20,BK,1,What is he talking about?,NoOne
4,2020-11-28 18:50:00,20,CK,0,,
5,2020-11-28 19:10:00,40,VR,1,How is your day?,NoOne
6,2020-11-28 19:10:00,40,DK,0,,
7,2020-11-28 19:10:00,40,AS,1,How is your day?,NoOne
8,2020-11-28 19:10:00,40,BK,0,,
9,2020-11-28 19:10:00,40,CK,0,,
