In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import pandas as pd
from collections import defaultdict, Counter
from scipy.stats import chisquare,chi2_contingency
from statsmodels.stats.proportion import proportions_ztest, proportions_chisquare
from IPython.display import display, HTML
from analysis_utils import get_data

sns.set_style('white')
sns.set_context('notebook', font_scale=1.3)
sns.set_palette('tab10')
pd.options.mode.chained_assignment = None

### Getting pids of required participants

In [None]:
exp_pids = {}
for exp_num in range(1,5):
    df = pd.read_csv(f"pids/{exp_num}.csv", header = None, index_col = False)
    exp_pids[exp_num] = df[0].tolist()
    if exp_num == 4:
        print(len(exp_pids[exp_num]))

### Experimental condition data

In [None]:
num_conditions = [3, 2, 2, 4]
exp_condition_names = [{0: 'No FB', 1: 'MCFB' , 2: 'Action FB'}, {0: 'No FB', 1: 'MCFB'}, {0:'No FB' , 1:'MCFB'}, {0: 'Info without reward', 1: 'Without info and without reward', 2: 'Info with reward',3: 'Without info with reward' }]

### Experiment 1

In [None]:
from joblib import load
strategy_sequences = load("../data/1/strategy_sequences.pkl")
pids = exp_pids[1]
strategy_sequences = {pid: strategy_sequences[pid] for pid in pids}

In [None]:
### Plotting frequencies of strategies under MCFB over time
exp_data = get_data(1)
pdf = exp_data['participants']
mcfb_pids = pdf[pdf.feedback == 'meta']['pid'].tolist()
nofb_pids = pdf[pdf.feedback == 'none']['pid'].tolist()
actionfb_pids = pdf[pdf.feedback == 'action']['pid'].tolist()
mcfb_pids = [pid for pid in mcfb_pids if pid in pids]
nofb_pids = [pid for pid in nofb_pids if pid in pids]
actionfb_pids = [pid for pid in actionfb_pids if pid in pids]

In [None]:
def get_strategy_counts(S):
    num_participants = S.shape[0]
    num_trials = S.shape[1]
    num_strategies = 38
    strategy_count = np.zeros((num_trials, num_strategies))
    for i in range(num_participants):
        for k in range(num_trials):
            strategy_count[k][S[i][k]] += 1
    return strategy_count

In [None]:
mcfb_sequences = np.array([strategy_sequences[pid] for pid in mcfb_pids])
nofb_sequences = np.array([strategy_sequences[pid] for pid in nofb_pids])
actionfb_sequences = np.array([strategy_sequences[pid] for pid in actionfb_pids])

mcfb_counts = get_strategy_counts(mcfb_sequences)
nofb_counts = get_strategy_counts(nofb_sequences)
actionfb_counts = get_strategy_counts(actionfb_sequences)

mcfb_frequencies = mcfb_counts/mcfb_sequences.shape[0]
nofb_frequencies = nofb_counts/nofb_sequences.shape[0]
actionfb_frequencies = actionfb_counts/actionfb_sequences.shape[0]

# Strategy numbers are indexed at 0
forward_strategies = [21, 22, 27]
mcfb_forward = np.sum(mcfb_frequencies[:, forward_strategies], axis = 1)
nofb_forward = np.sum(nofb_frequencies[:, forward_strategies], axis = 1)
print("Relative frequency of participants who used present bias strategies in the first trial:", nofb_forward[0])
print("Relative frequency of participants who continued to exhibit present bias:", nofb_forward[-1])
actionfb_forward = np.sum(actionfb_frequencies[:, forward_strategies], axis = 1)

# Optimal strategy is 20
mcfb_optimal = mcfb_frequencies[:, 20]
nofb_optimal = nofb_frequencies[:, 20]
actionfb_optimal = actionfb_frequencies[:, 20]

print(f"Average forward frequency in test trials in MCFB condition, {np.mean(mcfb_forward[10:])}")
print(f"Average forward frequency in test trials in No FB condition, {np.mean(nofb_forward[10:])}")
print(f"Average forward frequency in test trials in Action FB condition, {np.mean(actionfb_forward[10:])}")

### Chi2 tests

In [None]:
# Perform chi2 test for difference in frequency of forward planning strategies
print("\nMCFB vs NoFB")
count = [np.sum(mcfb_counts[10:, forward_strategies]), np.sum(nofb_counts[10:, forward_strategies])]
res = proportions_chisquare(count = count, nobs = [mcfb_sequences.shape[0]*30, nofb_sequences.shape[0]*30])
print("chi2 =",'{:.2f}'.format(res[0]), ", p =", '{:.4f}'.format(res[1]))

print("MCFB vs ActionFB")
count = [np.sum(mcfb_counts[10:, forward_strategies]), np.sum(nofb_counts[10:, forward_strategies])]
res = proportions_chisquare(count=count, nobs = [mcfb_sequences.shape[0]*30, actionfb_sequences.shape[0]*30]) #MCFB vs ActionFB
print("chi2 =",'{:.2f}'.format(res[0]), ", p =", '{:.4f}'.format(res[1]))

### Transitions to optimal strategy

In [None]:
# Statistics on transition to optimal strategy
optimal_strategy = 20
def get_frequency_transition(S):
    count = 0
    for i in range(S.shape[0]):
        if optimal_strategy in S[i][1:]:
            count += 1
    return count

optimal_f = np.array([get_frequency_transition(S) for S in [mcfb_sequences, nofb_sequences, actionfb_sequences]])
n_participants = np.array([S.shape[0] for S in [mcfb_sequences, nofb_sequences, actionfb_sequences]])
print(["MCFB", "NoFB", "Action FB"])
print(np.divide(optimal_f, n_participants))
            
# MCFB vs NoFB
print(f"\nMCFB vs NoFB on transitions to the optimal strategy")
res = proportions_chisquare(count = optimal_f[:2], nobs = n_participants[:2])
print("chi2 =",'{:.2f}'.format(res[0]), ", p =", '{:.4f}'.format(res[1]))
# MCFB vs ActionFB
print(f"MCFB vs ActionFB on transitions to the optimal strategy")
res = proportions_chisquare(count = optimal_f[[0,2]], nobs = n_participants[[0, 2]])
print("chi2 =",'{:.2f}'.format(res[0]), ", p =", '{:.4f}'.format(res[1]))

### Getting strategy transitions across conditions

In [None]:
def get_transitions(S):
    num_participants = S.shape[0]
    num_trials = S.shape[1]
    transitions = []
    for i in range(num_participants):
        for j in range(num_trials - 1):
            transitions.append((S[i][j], S[i][j+1]))
    return transitions

from collections import Counter
sequences = [mcfb_sequences, nofb_sequences, actionfb_sequences]
condition_transitions = [get_transitions(S) for S in sequences]
condition_transition_counts = [Counter(T) for T in condition_transitions]
total_transitions = [29*S.shape[0] for S in sequences]

### Getting significantly different transitions between MCFB and NoFB conditions

In [None]:
mcfb_transitions = condition_transition_counts[0]
nofb_transitions = condition_transition_counts[1]
all_transitions_set = set(mcfb_transitions.keys()).union(set(nofb_transitions.keys()))
mcfb_total_transitions = total_transitions[0]
nofb_total_transitions = total_transitions[1]
p_sidak = 1-(0.95)**(1/(len(all_transitions_set)))
print(r"alpha_sidak is ", p_sidak)
significant_transitions = []
for transition in all_transitions_set:
    mcfb_count = mcfb_transitions[transition]
    nofb_count = nofb_transitions[transition]
    res = proportions_chisquare([mcfb_count, nofb_count], nobs = [mcfb_total_transitions, nofb_total_transitions])
    if res[1] < p_sidak:
        significant_transitions.append((transition, res[0], res[1]))
# In the transitions, strategies are indexed at 0
print("Number of significant transitions:", len(significant_transitions))
for transition_detail in significant_transitions:
    transition = transition_detail[0]
    print("Transition: ", transition)
    print(mcfb_transitions[transition]/mcfb_total_transitions)
    print(nofb_transitions[transition]/nofb_total_transitions)

### Plotting frequencies across conditions

In [None]:
plt.close()
plt.figure(figsize=(40,21))
plt.xlabel("Trial Number", fontsize = 80)
plt.ylabel("Frequency", fontsize = 80)
plt.ylim(top = 1.19, bottom = -0.02)
plt.tick_params(axis='both', which='major', labelsize=60)
#plt.title("Frequency of strategies", fontsize = 24)
plt.plot(range(1,31), mcfb_optimal, label = 'Goal Setting (Optimal FB)', marker = '*', color = 'darkorange', markersize = 35, linewidth = 6)
plt.plot(range(1,31), nofb_optimal, label = 'Goal Setting (No FB)', marker = '*', color = 'grey', markersize = 35, linewidth = 6)
plt.plot(range(1,31), nofb_forward, label = 'Forward Planning (No FB)',color = 'grey', marker = 'o', markersize =25, linewidth = 6)
plt.plot(range(1,31), mcfb_forward, label = 'Forward Planning (Optimal FB)',marker = 'o',color = 'darkorange',markersize = 25, linewidth = 6)
plt.legend(loc='upper center',ncol = 2, fontsize = 65.2)
plt.savefig("figs/1/strategy_frequency.pdf", bbox_inches='tight')
plt.close()

### Verbal responses

In [None]:
exps = [1, 2, 3, 4]
optimal_lessons = [1]
beneficial_lessons = [1, 3, 5, 7, 8, 9]

In [None]:
def preprocess(df, exp_num):
    df = df[['pid', 'l1', 'g1','l2','g2','l3','g3']]
    df.dropna(thresh = 3, inplace = True)
    df = df[df.l1 != 99]
    df['pid'] = df['pid'].astype(int)
    pids = exp_pids[exp_num]
    df = df[df['pid'].isin(pids)]
    return df

def get_lesson_pairs(df):
    pairs = []
    temp_df = df.copy()
    temp_df.set_index('pid', inplace = True)
    pid_lesson_pairs = defaultdict(list)
    for i in range(1,4):
        t_df = temp_df[[f'l{i}', f'g{i}']]
        t_df = t_df.dropna(thresh = 1)
        t_df[f'l{i}'] = t_df[f'l{i}'].astype(int)
        pairs_list = t_df.T.to_dict(orient = 'list')
        for pid in pairs_list.keys():
            pair = pairs_list[pid]
            pair[1] = pair[1].lower()
            pid_lesson_pairs[pid].append(tuple(pair))
    return dict(pid_lesson_pairs)

def get_condition_pairs(pid_lesson_pairs, pid_conditions, pid = -1, condition = -1):
    all_pairs = []
    if pid == -1:
        pid_list = list(pid_lesson_pairs.keys())
    else:
        pid_list = [pid]
    if condition != -1:
        pid_list = [pid for pid in pid_list if pid_conditions[pid] == condition]
    for p in pid_list:
        pairs = pid_lesson_pairs[p]
        for pair in pairs:
            all_pairs.append(pair)
    return all_pairs, pid_list

def get_category_count(condition_pairs, lesson_category = "all", generality_category = "all"):
    lesson_counts, generality_counts = defaultdict(int), defaultdict(int)
    considered_pairs = []
    if lesson_category == "beneficial":
        considered_lessons = beneficial_lessons
    elif lesson_category == "optimal":
        considered_lessons = optimal_lessons
    elif lesson_category == "all":
        considered_lessons = beneficial_lessons
    if generality_category != "all":
        considered_generality = [generality_category]
    else:
        considered_generality = ['g','s']
    for pair in condition_pairs:
        if (pair[0] in considered_lessons) and (pair[1] in considered_generality):
            considered_pairs.append(pair)
    return len(considered_pairs)

def get_participant_category_count(pid_lesson_pairs, pid_list, lesson_category = "all", generality_category = "all"):
    if lesson_category == "beneficial":
        considered_lessons = beneficial_lessons
    elif lesson_category == "optimal":
        considered_lessons = optimal_lessons
    elif lesson_category == "all":
        considered_lessons = list(range(11))
    if generality_category != "all":
        considered_generality = [generality_category]
    else:
        considered_generality = ['g','s']
    count = 0
    for pid in pid_list:
        pairs = pid_lesson_pairs[pid]
        for pair in pairs:
            if pair[0] in considered_lessons and pair[1] in considered_generality:
                count += 1
            break
    return count

### Getting experimental condition by pid

In [None]:
pid_conditions = defaultdict(dict)
conditions = {'none': 0, 'meta': 1, 'action': 2}
def condition_column(reward, info):
    if info and not reward:
        res =  0
    elif not info and not reward:
        res =  1
    elif info and reward:
        res = 2
    else:
        res = 3
    return res
for exp in exps:
    df = pd.read_csv(f"../data/{exp}/participants.csv")
    if exp == 3:
        df = df[df.stage == 1]
    if not exp == 4:
        df['condition'] = df['feedback'].map(conditions)
    else:
        df['condition'] = df.apply(lambda x: condition_column(x['with_reward'], x['with_info']), axis = 1)
    data = df[['pid', 'condition']]
    data.set_index('pid', inplace = True)
    d = data.to_dict()['condition']
    pid_conditions[exp] = d
pid_conditions = dict(pid_conditions)

### Getting verbal responses by experiment number

In [None]:
exp_dfs = []
for exp in exps:
    csv_file = f"../data/{exp}/survey.csv"
    exp_dfs.append(preprocess(pd.read_csv(csv_file), exp))

### Getting frequency and proportion of different combination of categories of lessons

In [None]:
modes = list(itertools.product(["beneficial", "optimal"], ["g","all"]))
print(modes)
total_results = []
total_sum = 0
total_participants = 0
for exp_num in range(1,5):
    lp = get_lesson_pairs(exp_dfs[exp_num - 1])
    total_sum += sum([len(p) for p in list(lp.values())])
    total_participants += len(lp.keys())
    ccp,_ = get_condition_pairs(lp, pid_conditions[exps[exp_num - 1]])
    results = []
    for i in modes:
        results.append(get_category_count(ccp, lesson_category = i[0], generality_category = i[1]))
    total_results.append(results)
total_results = np.array(total_results)
total_results_sum = np.sum(total_results, axis = 0)
print(f"Frequency of lessons in each category {total_results_sum}")
print(f"Total lessons learnt {total_sum}")
print(f"Total number of participants {total_participants}")
print(f"Proportion of people who belong to the lesson category: \n {total_results_sum/total_participants}")

### Plotting proportion of participants who learnt lessons belonging to a category

In [None]:
modes = list(itertools.product(["beneficial", "optimal"], ["g","all"]))
for exp_num in range(1,5):
    data = []
    conds = num_conditions[exp_num - 1]
    lp = get_lesson_pairs(exp_dfs[exp_num - 1])
    plt.figure(figsize = (10,6))
    if exp_num != 4:
        plt.title(f"Experiment {exp_num}", fontsize = 16)
    else:
        plt.title(f"Experiment 5", fontsize = 16)
    for i in range(conds):
        ccp,p = get_condition_pairs(lp, pid_conditions[exps[exp_num - 1]], condition = i)
        results = []
        for j in modes:
            results.append(get_participant_category_count(lp, p, lesson_category = j[0], generality_category = j[1]))
        proportion_results = np.array(results)/len(p)
        data += [[proportion_results[0], "General Beneficial", exp_condition_names[exp_num - 1][i]], [proportion_results[1], "All Beneficial", exp_condition_names[exp_num - 1][i]]]
        data += [[proportion_results[2], "General Optimal", exp_condition_names[exp_num - 1][i]], [proportion_results[3], "All Optimal", exp_condition_names[exp_num - 1][i]]]
    df = pd.DataFrame(data, columns = ['Proportion', 'Category', 'condition'])
    sns.barplot(x='Category', y='Proportion', hue = 'condition', data=df)
    plt.xlabel('Category', fontsize = 16)
    plt.ylabel('Proportion', fontsize = 16)
    plt.tick_params(axis = 'both', labelsize = 16)
    plt.legend(title = 'condition',fontsize = 16, title_fontsize = 16)
    plt.ylim(top = 1.0)
    plt.savefig(f"figs/{exp_num}/lesson_category.png")
    plt.close()

### Plotting relative frequency of participants that learnt a particular lesson

In [None]:
modes = list(itertools.product(["beneficial", "optimal"], ["g","all"]))
for exp_num in range(1,5):
    data = []
    conds = num_conditions[exp_num - 1]
    lp = get_lesson_pairs(exp_dfs[exp_num - 1])
    plt.figure(figsize = (10,6))
    if exp_num != 4:
        plt.title(f"Experiment {exp_num}", fontsize = 16)
    else:
        plt.title(f"Experiment 5", fontsize = 16)
    condition_wise_lesson_counts = []
    for i in range(conds):
        ccp,p = get_condition_pairs(lp, pid_conditions[exps[exp_num - 1]], condition = i)
        results = []
        total_participant_lesson_count = np.zeros(12)
        for pid in p:
            participant_lesson_count = np.zeros(12)
            participant_pairs = lp[pid]
            for pair in participant_pairs:
                participant_lesson_count[pair[0]+1] += 1
            total_participant_lesson_count += participant_lesson_count
        total_participant_lesson_count = total_participant_lesson_count/ np.sum(total_participant_lesson_count)
        condition_wise_lesson_counts.append(total_participant_lesson_count)
    for i in range(12):
        for j in range(conds):
            data.append([condition_wise_lesson_counts[j][i], i-1, exp_condition_names[exp_num - 1][j]])
    df = pd.DataFrame(data, columns = ['Proportion', 'Lesson', 'Condition'])
    sns.barplot(x='Lesson', y='Proportion', hue = 'Condition', data=df)
    plt.xlabel('Lesson', fontsize = 16)
    plt.ylabel('Proportion', fontsize = 16)
    plt.tick_params(axis = 'both', labelsize = 16)
    plt.legend(title = 'Condition',fontsize = 16, title_fontsize = 16)
    plt.savefig(f"figs/{exp_num}/lesson_individual.png")
    plt.close()

### Analysis for Experiment 4

#### Test for beneficial lessons

In [None]:
modes = list(itertools.product(["beneficial", "optimal"], ["g","all"]))
exp_num = 4
conds = num_conditions[exp_num - 1]
lp = get_lesson_pairs(exp_dfs[exp_num - 1])
total_results = []
p_counts = []
for i in range(conds):
    ccp, p = get_condition_pairs(lp, pid_conditions[exps[exp_num - 1]], condition = i)
    results = []
    for j in modes:
        results.append(get_participant_category_count(lp, p, lesson_category = j[0], generality_category = j[1]))
    total_results.append(results[1]) # Corresponds to beneficial all 
    proportion_results = np.array(results)/len(p)
    p_counts.append(len(p))
total_results = np.array(total_results)

# Reward conditions are 2,3 and info conditions are 0,2
print(f"Test for difference in proportion of beneficial lessons learnt between participants who got reward and who didn't")
counts = [total_results[2] + total_results[3], total_results[0] + total_results[1]]
nobs = [p_counts[2] + p_counts[3], p_counts[0] + p_counts[1]]
print(counts[0]/nobs[0], counts[1]/nobs[1])
stat, pval = proportions_ztest(counts, nobs)
print("z =",'{:.2f}'.format(stat), ", p =", '{:.4f}'.format(pval))

print(f"Test for difference in proportion of beneficial lessons learnt between participants who got info and who didn't")
counts = [total_results[0] + total_results[2], total_results[1] + total_results[3]]
nobs = [p_counts[0] + p_counts[2], p_counts[1] + p_counts[3]]
print(counts[0]/nobs[0], counts[1]/nobs[1])
stat, pval = proportions_ztest(counts, nobs)
print("z =",'{:.2f}'.format(stat), ", p =", '{:.4f}'.format(pval))

#### Test for learning of optimal lessons

In [None]:
modes = list(itertools.product(["beneficial", "optimal"], ["g","all"]))
exp_num = 4
conds = num_conditions[exp_num - 1]
lp = get_lesson_pairs(exp_dfs[exp_num - 1])
total_results = []
p_counts = []
for i in range(conds):
    ccp, p = get_condition_pairs(lp, pid_conditions[exps[exp_num - 1]], condition = i)
    results = []
    for j in modes:
        results.append(get_participant_category_count(lp, p, lesson_category = j[0], generality_category = j[1]))
    total_results.append(results[3]) # Corresponds to optimal all
    proportion_results = np.array(results)/len(p)
    p_counts.append(len(p))
total_results = np.array(total_results)

# Reward conditions are 2,3 and info conditions are 0,2
print(f"Test for difference in proportion of optimal lessons learnt between participants who got reward and who didn't")
counts = [total_results[2] + total_results[3], total_results[0] + total_results[1]]
nobs = [p_counts[2] + p_counts[3], p_counts[0] + p_counts[1]]
print(counts[0]/nobs[0], counts[1]/nobs[1])
stat, pval = proportions_ztest(counts, nobs)
print("z =",'{:.2f}'.format(stat), ", p =", '{:.4f}'.format(pval))

print(f"Test for difference in proportion of optimal lessons learnt between participants who got info and who didn't")
counts = [total_results[0] + total_results[2], total_results[1] + total_results[3]]
nobs = [p_counts[0] + p_counts[2], p_counts[1] + p_counts[3]]
print(counts[0]/nobs[0], counts[1]/nobs[1])
stat, pval = proportions_ztest(counts, nobs)
print("z =",'{:.2f}'.format(stat), ", p =", '{:.4f}'.format(pval))

### Proportion of participants in each lesson category across all experiments

In [None]:
modes = list(itertools.product(["beneficial", "optimal"], ["g","all"]))
exp_fb_conditions = [[1],[1],[1],[0,2,3]]
exp_nfb_conditions = [[0],[0],[0],[1]]
print(modes)
fb_pairs = []
nfb_pairs = []
fb_count, nfb_count = 0,0
total_fb_results = []
total_nfb_results = []
for exp_num in range(1,5):
    conds = num_conditions[exp_num - 1]
    lp = get_lesson_pairs(exp_dfs[exp_num - 1])
    for i in range(conds):
        if i in exp_fb_conditions[exp_num - 1]:
            temp, fb_p = get_condition_pairs(lp, pid_conditions[exps[exp_num - 1]], condition = i)
            a = []
            for j in modes:
                a.append(get_participant_category_count(lp,fb_p,lesson_category = j[0], generality_category = j[1]))
            total_fb_results.append(a)
            fb_pairs += temp
            fb_count += len(fb_p)
        else:
            temp, nfb_p = get_condition_pairs(lp, pid_conditions[exps[exp_num - 1]], condition = i)
            a = []
            for j in modes:
                a.append(get_participant_category_count(lp,nfb_p,lesson_category = j[0], generality_category = j[1]))
            total_nfb_results.append(a)
            nfb_pairs += temp
            nfb_count += len(nfb_p)
print(total_fb_results)
print(total_nfb_results)
print("FB results:")
fb_results = np.sum(total_fb_results, axis = 0)
print(fb_results)
print("Total participants in MCFB:", fb_count)
print("Proportion of participants:")
print(np.array(fb_results)/fb_count)
print("\nNo FB results:")
nfb_results = np.sum(total_nfb_results, axis = 0)
print(nfb_results)
print("Total participants in No FB:", nfb_count)
print("Proportion of participants:")
print(np.array(nfb_results)/nfb_count)

### Chi2 tests

In [None]:
nobs = [fb_count, nfb_count]
print(nobs)
categories = ['Beneficial and general lessons', 'All beneficial lessons', 'Optimal and general', 'All optimal lessons']
for i in range(4):
    print(f"Test for difference between FB and No FB conditions in proportions of {categories[i]}")
    counts = [fb_results[i], nfb_results[i]]
    print(counts)
    stat, pval = proportions_ztest(counts, nobs)
    print("z =",'{:.2f}'.format(stat), ", p =", '{:.4f}'.format(pval))

### Getting general lessons

In [None]:
modes = list(itertools.product(["beneficial", "optimal"], ["g","all"]))
exp_fb_conditions = [[1],[1],[1],[0,2,3]]
exp_nfb_conditions = [[0],[0],[0],[1]]
print(modes)
fb_pairs = []
nfb_pairs = []
fb_count, nfb_count = 0,0
total_fb_results = []
total_nfb_results = []
for exp_num in range(1,5):
    conds = num_conditions[exp_num - 1]
    lp = get_lesson_pairs(exp_dfs[exp_num - 1])
    for i in range(conds):
        if i in exp_fb_conditions[exp_num - 1]:
            temp, fb_p = get_condition_pairs(lp, pid_conditions[exps[exp_num - 1]], condition = i)
            a = []
            a.append(get_participant_category_count(lp,fb_p,lesson_category = "all", generality_category = 'g'))
            total_fb_results.append(a)
            fb_pairs += temp
            fb_count += len(fb_p)
        else:
            temp, nfb_p = get_condition_pairs(lp, pid_conditions[exps[exp_num - 1]], condition = i)
            a = []
            a.append(get_participant_category_count(lp,nfb_p,lesson_category = "all", generality_category = 'g'))
            total_nfb_results.append(a)
            nfb_pairs += temp
            nfb_count += len(nfb_p)
print(total_fb_results)
print(total_nfb_results)
print("FB results:")
fb_results = np.sum(total_fb_results, axis = 0)
print(fb_results)
print("Total participants in MCFB:", fb_count)
print("Proportion of participants:")
print(np.array(fb_results)/fb_count)
print("\nNo FB results:")
nfb_results = np.sum(total_nfb_results, axis = 0)
print(nfb_results)
print("Total participants in No FB:", nfb_count)
print("Proportion of participants:")
print(np.array(nfb_results)/nfb_count)

### Chi2 test for general lessons

In [None]:
print("Test for difference in proportions of general lessons between MCFB and FB conditions")
counts = [fb_results[0], nfb_results[0]]
stat, pval = proportions_ztest(counts, nobs)
print("z =",'{:.2f}'.format(stat), ", p =", '{:.4f}'.format(pval))