In [1]:
# libs
import os
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from scipy.stats import chi2_contingency
from scipy.stats import mannwhitneyu
import numpy as np

### Load RCT Results
1. Load RCT Result
2. Prepare data for accuracy

In [2]:
INPUT_DIR = os.path.join(os.getcwd(), "rct_results")

df_dict = {}
pre_fix = "10_concepts"
target_questions = ['concept', 'prediction', 'limitation']
for group in ["non_native", "native"]:
    if group not in df_dict.keys():
        df_dict[group] = dict()
    for condition in ['definition', 'story']:
        if condition not in df_dict.keys():
            df_dict[group][condition] = dict()
        for study in ["scores", "scores_followup"]:
            file_path = os.path.join(INPUT_DIR, '_'.join([pre_fix, group, condition, study + ".tsv"]))
            study_df = pd.read_csv(file_path, sep='\t')
            df_dict[group][condition][study] = study_df.copy()

for group in df_dict.keys():
    for condition in df_dict[group].keys():
        for study in ["scores", "scores_followup"]:
            raw_df = df_dict[group][condition][study]
            for q in target_questions:
                # definition
                real_answer = np.array(raw_df[q + "_question_answer"].values)
                user_answer = np.array(raw_df[q + "Q"].values)
                correctness = np.multiply((real_answer == user_answer), 1)
                raw_df[q + "_rst"] = correctness
            df_dict[group][condition][study] = raw_df

### Statistical Analysis
1. Chi-Squared for the question accuracy in the test and the follow-up test
2. Mann-Whitney test for the relevance score and interest in law

In [3]:
# Mann Whitney
def mannwhitneyu_test(data1, data2):
    u1, p = mannwhitneyu(data1, data2, method="asymptotic")
    threshold = 0.05
    if p < threshold:
        # Statistically different
        n = True
    else:
        n = False
    return n, u1, p

# Chi-Squared test of accuracy
print("Chi Squared for Accuracy")
for group in ["native", "non_native"]:
    print("For Group ", group)
    for question in [q + "_rst" for q in target_questions]:
        print("*************")
        print("Question: ", question)
        contingency_table = []
        for condition in ["story", "definition"]:  
            data = df_dict[group][condition]["scores"]
            incorrect = data[question].value_counts()[0]
            correct = data[question].value_counts()[1]
            contingency_table.append([correct, incorrect])
        stat, p, dof, expected = chi2_contingency(contingency_table)
        print(stat, p)

# Mann-Whitney
print("\n")
print("Mann-Whitney U Test")
for group in ["native", "non_native"]:
    print("For Group ", group)
    for metric in ["relevance", "law_interest"]:  
        data1 = df_dict[group]['story']['scores'][metric]
        data2 = df_dict[group]['definition']['scores'][metric]
        print("Metric: ", metric)
        print("Story Condition MD(STD): ", np.mean(data1), np.std(data1))
        print("Definition Condition MD(STD): ", np.mean(data2), np.std(data2))
        n, u, p = mannwhitneyu_test(data1, data2)
        print(u, p)
        print("\n")

Chi Squared for Accuracy
For Group  native
*************
Question:  concept_rst
0.48339200428194906 0.4868898821253125
*************
Question:  prediction_rst
0.6537854593213362 0.41876252154968585
*************
Question:  limitation_rst
2.0120481800016 0.1560545129081714
For Group  non_native
*************
Question:  concept_rst
0.20182537119577626 0.6532514758125724
*************
Question:  prediction_rst
4.2850363890097 0.0384492595157595
*************
Question:  limitation_rst
11.772340686798307 0.0006011733562701144


Mann-Whitney U Test
For Group  native
Metric:  relevance
Story Condition MD(STD):  3.20625 1.3233332677371943
Definition Condition MD(STD):  2.6303030303030304 1.3037207473076875
16421.0 0.00010137547692223972


Metric:  law_interest
Story Condition MD(STD):  3.78125 0.9916644782889019
Definition Condition MD(STD):  3.6666666666666665 1.119162746219357
13800.0 0.45934243459962987


For Group  non_native
Metric:  relevance
Story Condition MD(STD):  3.1882352941176473 

### Followup Assessment result

In [4]:
total_respondant = 0
total_participant = 0
wrong_answer_in_pre = dict()
for group in df_dict.keys():
    for condition in df_dict[group].keys():
        _type = group + "_" + condition
        original = df_dict[group][condition]['scores']
        followup = df_dict[group][condition]['scores_followup']
        merged_df = pd.merge(original, followup, on=['PROLIFIC_PID', 'q_id'])
        for question in ['concept_rst', 'prediction_rst', 'limitation_rst']:
            print("Question: ", question)
            retention_population = merged_df[merged_df[question + "_x"] == True]
            print(round(len(retention_population[retention_population[question + "_y"] == True])/len(retention_population), 4)*100)

            # save the data:
            if question not in wrong_answer_in_pre.keys():
                wrong_answer_in_pre[question] = dict()
            retention_population[question + "_y"] = retention_population[question + "_y"].replace({True: 1, False: 0})
            wrong_answer_in_pre[question][_type] = list(retention_population[question + "_y"].values)
            

        total_respondant += len(followup)
        total_participant += len(original)
        print("\n")
print("Total Responding Rate: ", round(total_respondant/total_participant, 2))

Question:  concept_rst
86.32
Question:  prediction_rst
82.8
Question:  limitation_rst
91.01


Question:  concept_rst
98.56
Question:  prediction_rst
89.60000000000001
Question:  limitation_rst
92.31


Question:  concept_rst
92.55
Question:  prediction_rst
88.89
Question:  limitation_rst
91.03


Question:  concept_rst
91.58
Question:  prediction_rst
86.83999999999999
Question:  limitation_rst
91.01


Total Responding Rate:  0.71


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  retention_population[question + "_y"] = retention_population[question + "_y"].replace({True: 1, False: 0})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  retention_population[question + "_y"] = retention_population[question + "_y"].replace({True: 1, False: 0})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-

In [5]:
comparison = [
    ["non_native_definition", "non_native_story"],
    ["native_definition", "native_story"]
]
for question in ['concept_rst', 'prediction_rst', 'limitation_rst']:
    print("For Question ", question)
    for case1, case2 in comparison:
        print("Chi Squared for ", case1, case2)
        list1 = wrong_answer_in_pre[question][case1]
        list2 = wrong_answer_in_pre[question][case2]
        table = []
        total = 0
        for _list in [list1, list2]:  
            incorrect = _list.count(0)
            correct = _list.count(1)
            table.append([correct, incorrect])
        stat, p, dof, expected = chi2_contingency(table)
        print(stat, p)
    print("\n")

For Question  concept_rst
Chi Squared for  non_native_definition non_native_story
12.739406463768152 0.000358031295726987
Chi Squared for  native_definition native_story
0.0 1.0


For Question  prediction_rst
Chi Squared for  non_native_definition non_native_story
1.591374809949523 0.20712982176873498
Chi Squared for  native_definition native_story
0.021946010019681522 0.882230816154632


For Question  limitation_rst
Chi Squared for  non_native_definition non_native_story
0.008581283298288127 0.9261933654671165
Chi Squared for  native_definition native_story
0.0 1.0


