In [1]:
import firebase_admin
from firebase_admin import credentials, firestore, db
import json
import pandas as pd
import numpy as np
from pykalman.pykalman import *
import matplotlib.pyplot as plt
import seaborn as sns
from statannotations.Annotator import Annotator
import statsmodels.formula.api as smf
import scipy
import control
from prep_emotioncon import PrepEmotioncon
prep = PrepEmotioncon()
from emotioncon_modelling import Modelling
from emotioncon_stats import Statistics

In [2]:
emo = Modelling()
emostats = Statistics()
prep = PrepEmotioncon()
attention_cr = np.array([1, 0, 0, 1, 1, 1, 1, 0, 0, 0])
mood_categories = ['disgusted', 'amused', 'calm', 'anxious', 'sad']

In [3]:
# load data from firebase
cred = credentials.Certificate('emotioncon2-firebase-adminsdk-kmpni-88fe22d6a2.json')
default_app = firebase_admin.initialize_app(cred)
collname = 'emotionconexp'
client = firestore.client()
subjList = []
for subj in client.collection(collname).stream():
    subjList.append(subj.id)
    print(subj.id)

03Qa7Vn3GpW7oHyFGlnIJIOv9Uo1
0f37llahneZuzVNeOn24LZw1ukq1
0jmuq5dxlHRKff218eH4Zkkl0Jb2
1MSzpScha7QUanJUgHkrsGfL2Zu1
1PccG5V7UpZ21zCzkTyVI6GJEba2
1ctyNByC4kfTb9GqDvjM2WzNerX2
1twtTg29gzNkhh8uHLLOOKS87Qr1
3VVx42fFdPfeqsBjXxmSioEbSvC2
3tH9330T6ddgp2RZ6K9vE3YDpvJ3
4V9oOwR9pzbw2CVxYQSehXZKXuF3
5zY88vbxwYef42TyMuCWQ2l8jdh1
6QAP227aj4MZqvmLvVJvPsFKc6Z2
6u4PW7Q06WZfLn9FgPfrw1IdFVE2
6wZsr3ALaVSlaGFDG1yzK2VxLLv2
7VaMpZ75ZVPttZPN1qjBVmuwtxQ2
7mzS42DUbUSI9mmfM6POBPw6mC73
8KbM5adjXccj4vkEQnA7T1HwEFr2
9i9d4q9b2jM0j1TT1KASXNpk6WT2
AELvntCQRoU8m5YhxdKAe6S6iC33
AY7DERZKipScSBSrRQbRhUww32w1
AaMDCKRD3Fb8DlBEvzkhBHNAAS63
BX5DoNTOaDUS0GVlbQFIniHTYwE3
CgvkefieE5UGcD0q4t3thGcoeRw2
CimsOfNLwjdKQ0VlvKhymbTpHx52
CqWHQDlksPSi8wl62N6r8dYabOT2
D3aeFZmKN6NKj1xSISrQ9K6qwYO2
Da7TQldvpmN5hZfdWulialnp81b2
Dl6bxuxSu4ZmXsTadVYCs9BgTMw1
EGCxdCN1utOW76wZDjeqOSruz4S2
ETHcvfaBCyb63dPbE3H7X7LtMiy1
EpmQoF6201YwdygP2GV75KBzOOI3
HUek6dXNA6TYXdF1OM5qSzusvz13
HsHgiNdvXKgVIbAZ9BSWgdqkmAf1
J2CCa54Rd5SXW6SfDabha2UAJQ33
JJxHkxxt4vMfCi

In [4]:
alldata = []
notcompleted = 0
for subj in subjList:
    iddoc = client.document(collname + '/{0}'.format(subj)).get().to_dict()
    subjid = str(iddoc['subjectID_prolific'])
    fsid = str(iddoc['uid_firebase'])
    try: 
        iddoc['start_time_jspsych'] = iddoc['start_time_jspsych'].timestamp()
    except: 
        pass
    if 'completed' not in iddoc:
        notcompleted +=1
        continue
    if iddoc['completed'] == 0:
        notcompleted +=1
        continue
    setupdoc = client.document(collname + '/{0}/setup/variables'.format(subj)).get().to_dict()
    datadoc = client.document(collname + '/{0}/taskdata/data'.format(subj)).get().to_dict()
    datadoc['questionnaire'] = client.document(collname + '/{0}/questionnaire/data'.format(subj)).get().to_dict()
    subjectdata = {'id': fsid, 'setupdoc': setupdoc, 'datadoc': datadoc, 'iddoc': iddoc}
    alldata.append(subjectdata)

In [5]:
print('number of subjects that opened webbapp: '+ str(len(subjList)) + \
        '\nnumber of subjects that did not complete: ' + str(notcompleted) + \
        '\nnumber of subjects that performed experiment:  ' + str(len(alldata)))

number of subjects that opened webbapp: 137
number of subjects that did not complete: 28
number of subjects that performed experiment:  109


Prolific automatically opens slot for experiment for a new person if a subject does not complete experiment. We have no information about the people who intended to do the experiment but did not complete it. Therefore, they are not mentioned in the paper.

In [6]:
data = []
mood_rating_list = []
symptoms_list = []
k = 0
for i in range(len(alldata)):
    setupdoc = alldata[i]['setupdoc']
    datadoc = alldata[i]['datadoc']
    iddoc = alldata[i]['iddoc']
    subjid = alldata[i]['id']
    nTrials = int(setupdoc['nTrials'])
    try:
        attention_checks = prep.extract_attention_checks(alldata[i]['datadoc']['check_response'][1:-1])
        attention_check_ratio = np.sum(attention_checks == attention_cr) / len(attention_cr)
        if attention_check_ratio < 0.7:
            print('exclusion due to failed attention checks subject ' + str(i+1) + ': ' + subjid)
            continue
            
        indices = np.empty([len(mood_categories), nTrials+2])
        ratings = np.empty([len(mood_categories), nTrials+2])

        for j in range(nTrials+2):
            idx = np.array(prep.stimulus_numbering(datadoc['stimulus' + str(j+1)], mood_categories))
            rating = np.array(prep.extract_reponse(datadoc['response' + str(j+1)]))
            ratings[:, j] = rating[idx]
             
        mood_rating_list.append(ratings)
        data.append(alldata[i])
        data[k]['ratings'] = ratings
        data[k]['ratings_normalized'] = prep.normalize_2d(ratings)
        data[k]['attention_checks'] = attention_checks
#         data[k]['intervention'] = {'condition': iddoc['intervention_condition'], 'success': }
        symptoms = prep.extract_questionnaire_reponse(datadoc['questionnaire']['responses'])
        symptoms_list.append(symptoms)
        data[k]['symptoms'] = symptoms
        k += 1
        print('successfully transformed data for subject ' + str(i+1) + ': ' + subjid)
    except:
        print('no data ' + str(i+1) + ': ' + subjid)
        pass
    
mood_ratings = np.array(mood_rating_list).T
symptoms_ratings = np.array(symptoms_list)
Nsj = np.shape(mood_ratings)[2]

successfully transformed data for subject 1: 03Qa7Vn3GpW7oHyFGlnIJIOv9Uo1
successfully transformed data for subject 2: 0f37llahneZuzVNeOn24LZw1ukq1
successfully transformed data for subject 3: 0jmuq5dxlHRKff218eH4Zkkl0Jb2
successfully transformed data for subject 4: 1PccG5V7UpZ21zCzkTyVI6GJEba2
successfully transformed data for subject 5: 1ctyNByC4kfTb9GqDvjM2WzNerX2
successfully transformed data for subject 6: 1twtTg29gzNkhh8uHLLOOKS87Qr1
successfully transformed data for subject 7: 3VVx42fFdPfeqsBjXxmSioEbSvC2
successfully transformed data for subject 8: 3tH9330T6ddgp2RZ6K9vE3YDpvJ3
successfully transformed data for subject 9: 4V9oOwR9pzbw2CVxYQSehXZKXuF3
successfully transformed data for subject 10: 6QAP227aj4MZqvmLvVJvPsFKc6Z2
successfully transformed data for subject 11: 6u4PW7Q06WZfLn9FgPfrw1IdFVE2
successfully transformed data for subject 12: 6wZsr3ALaVSlaGFDG1yzK2VxLLv2
successfully transformed data for subject 13: 7VaMpZ75ZVPttZPN1qjBVmuwtxQ2
successfully transformed data for 

In [7]:
# save data
print('N = ' + str(len(data)))
np.save('../data/data_experiment.npy', data)

N = 108
