This notebook extracts and exports measures of learning outcomes from the responses to the pre-test and the post-test data of the JUSThink Dialogue and Actions Corpus.

In later notebooks, we focus on one feature to estimate the learning outcomes: relative learning gain.

In [1]:
import pathlib as pl
import pandas as pd
import numpy as np

## Define paths.

In [2]:
# Inputs.
data_dir = pl.Path('../data')
test_responses_dir = data_dir.joinpath('test_responses')
pretest_file = test_responses_dir.joinpath('justhink19_pretest.csv')
posttest_file = test_responses_dir.joinpath('justhink19_posttest.csv')

# Outputs.
processed_data_dir = pl.Path('../processed_data')
output_dir = processed_data_dir.joinpath('learning_features')
learning_file = output_dir.joinpath('justhink19_learning_features.csv')

# Create output directories if they do not exist.
for folder in [output_dir]:
    if not folder.exists():
        folder.mkdir(parents=True)

## Load data.

In [3]:
pretest_df = pd.read_csv(pretest_file, index_col='team_no')
print('Pretest table contains {} teams (including key)'.format(len(pretest_df)))

Pretest table contains 40 teams (including key)


In [4]:
posttest_df = pd.read_csv(posttest_file, index_col='team_no')
print('Pretest table contains {} teams (including key)'.format(len(pretest_df)))

Pretest table contains 40 teams (including key)


In [5]:
def compute_participant_score(df, participant, 
                              key_index='key', inplace=True):
    if not inplace:
        df = df.copy()
    
    cols = [c for c in df.columns if '_{}'.format(participant) in c]
    participant_df = df[cols].copy()

    key_responses = participant_df.loc[key_index]
    
    c = '{}_score'.format(participant)
    df[c] = [np.sum(row == key_responses)/len(key_responses) 
             for i, row in participant_df.iterrows()]

    return df


for test_df in [pretest_df, posttest_df]:
    for participant in ['A', 'B']:
        compute_participant_score(test_df, participant, key_index='key')

## Compute relative learning gain (RLG) [1].
$$ 
RLG = \begin{cases} 
      \frac{posttest - pretest}{Max - pretest} & posttest \geq pretest \\
      \frac{posttest - pretest}{pretest} & posttest < pretest.
\end{cases}
$$


[1] M. Sangin, G. Molinari, M.-A. Nüssli, and P. Dillenbourg, “Facilitating peer knowledge modeling: Effects of a knowledge awareness tool on collaborative learning outcomes and processes,” Computers in Human Behavior, vol. 27, no. 3, pp. 1059–1067, May 2011, doi: 10.1016/j.chb.2010.05.032.

In [6]:
learning_df = pretest_df.copy()[[]]

# Compute the relative learning gain per participant.
for participant in ['A', 'B']:
    l = list()
    for team_no in pretest_df.index:
        pre = pretest_df.loc[team_no, '{}_score'.format(participant)]
        post = posttest_df.loc[team_no, '{}_score'.format(participant)]
        if post > pre:
            v = 1.0 * (post - pre) / (1 - pre)
        else:
            v = 1.0 * (post - pre) / pre
        l.append(v)

    # [(post - pre)/(1-pre) if post > pre else (post - pre)/pre]
    learning_df['{}_RLG'.format(participant)] = l

# Compute the averaged team relative learning gain.
learning_df['RLG'] = learning_df[['A_RLG', 'B_RLG']].mean(axis=1)

# Add pretest and posttest scores.
for participant in ['A', 'B']:
    for test_name, test_df in [('pretest', pretest_df), ('posttest', posttest_df)]:
        scores = test_df['{}_score'.format(participant)]
        c = '{}_{}'.format(participant, test_name)
        learning_df[c] = scores


learning_df.drop(['key'], inplace=True)
learning_df.index = learning_df.index.map(int)

# Round the values to 6 decimal places.
learning_df = learning_df.round(6)

learning_df

Unnamed: 0_level_0,A_RLG,B_RLG,RLG,A_pretest,A_posttest,B_pretest,B_posttest
team_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
6,-0.2,0.0,-0.1,0.5,0.4,0.6,0.6
7,0.0,0.285714,0.142857,0.7,0.7,0.3,0.5
8,0.2,-0.375,-0.0875,0.5,0.6,0.8,0.5
9,0.2,0.25,0.225,0.5,0.6,0.6,0.7
10,0.333333,0.4,0.366667,0.4,0.6,0.5,0.7
11,0.333333,0.4,0.366667,0.7,0.8,0.5,0.7
12,0.0,0.5,0.25,0.5,0.5,0.6,0.8
14,0.0,-0.285714,-0.142857,0.6,0.6,0.7,0.5
15,0.333333,-0.375,-0.020833,0.4,0.6,0.8,0.5
16,0.0,0.2,0.1,0.6,0.6,0.5,0.6


## Export the learning outcomes table to file.

In [7]:
learning_df.to_csv(learning_file)
print('Written to {}'.format(learning_file))

Written to ../processed_data/learning_features/justhink19_learning_features.csv
