In [1]:
# Import necessary libraries
import firebase_admin
from firebase_admin import credentials, firestore, db
import json
import pandas as pd
import numpy as np

# Import custom modules
from prep_emotioncon import PrepEmotioncon
prep = PrepEmotioncon()
from emotioncon_modelling import Modelling
from emotioncon_stats import Statistics

In [2]:
# Create instances of the Modelling, Statistics, and PrepEmotioncon classes
emo = Modelling()
emostats = Statistics() 
prep = PrepEmotioncon()

# Define attention and mood categories
attention_cr = np.array([1, 0, 0, 1, 1, 1, 1, 0, 0, 0])  # Attention categories (binary array)
mood_categories = ['disgusted', 'amused', 'calm', 'anxious', 'sad']  # Mood categories (list of strings)

In [3]:
# Load data from Firebase
cred = credentials.Certificate('emotioncon2-firebase-adminsdk-kmpni-88fe22d6a2.json')  # Firebase credentials
default_app = firebase_admin.initialize_app(cred)  # Initialize the Firebase app
collname = 'emotionconexp'  # Name of the Firebase collection
client = firestore.client()  # Create a Firestore client

# Retrieve the list of subjects from the Firebase collection
subjList = []  # Empty list to store subject IDs
for subj in client.collection(collname).stream():
    subjList.append(subj.id)  # Add the subject ID to the subjList
    print(subj.id)  # Print the subject ID

03Qa7Vn3GpW7oHyFGlnIJIOv9Uo1
0f37llahneZuzVNeOn24LZw1ukq1
0jmuq5dxlHRKff218eH4Zkkl0Jb2
1MSzpScha7QUanJUgHkrsGfL2Zu1
1PccG5V7UpZ21zCzkTyVI6GJEba2
1ctyNByC4kfTb9GqDvjM2WzNerX2
1twtTg29gzNkhh8uHLLOOKS87Qr1
3VVx42fFdPfeqsBjXxmSioEbSvC2
3tH9330T6ddgp2RZ6K9vE3YDpvJ3
4V9oOwR9pzbw2CVxYQSehXZKXuF3
5zY88vbxwYef42TyMuCWQ2l8jdh1
6QAP227aj4MZqvmLvVJvPsFKc6Z2
6u4PW7Q06WZfLn9FgPfrw1IdFVE2
6wZsr3ALaVSlaGFDG1yzK2VxLLv2
7VaMpZ75ZVPttZPN1qjBVmuwtxQ2
7mzS42DUbUSI9mmfM6POBPw6mC73
8KbM5adjXccj4vkEQnA7T1HwEFr2
9i9d4q9b2jM0j1TT1KASXNpk6WT2
AELvntCQRoU8m5YhxdKAe6S6iC33
AY7DERZKipScSBSrRQbRhUww32w1
AaMDCKRD3Fb8DlBEvzkhBHNAAS63
BX5DoNTOaDUS0GVlbQFIniHTYwE3
CgvkefieE5UGcD0q4t3thGcoeRw2
CimsOfNLwjdKQ0VlvKhymbTpHx52
CqWHQDlksPSi8wl62N6r8dYabOT2
D3aeFZmKN6NKj1xSISrQ9K6qwYO2
Da7TQldvpmN5hZfdWulialnp81b2
Dl6bxuxSu4ZmXsTadVYCs9BgTMw1
EGCxdCN1utOW76wZDjeqOSruz4S2
ETHcvfaBCyb63dPbE3H7X7LtMiy1
EpmQoF6201YwdygP2GV75KBzOOI3
HUek6dXNA6TYXdF1OM5qSzusvz13
HsHgiNdvXKgVIbAZ9BSWgdqkmAf1
J2CCa54Rd5SXW6SfDabha2UAJQ33
JJxHkxxt4vMfCi

In [4]:
alldata = []  # Empty list to store all subject data
notcompleted = 0  # Counter for subjects who haven't completed the task

# Iterate over each subject in subjList
for subj in subjList:
    iddoc = client.document(collname + '/{0}'.format(subj)).get().to_dict()  # Retrieve the ID document of the subject
    subjid = str(iddoc['subjectID_prolific'])  # Extract the subject ID from the ID document
    fsid = str(iddoc['uid_firebase'])  # Extract the Firebase ID from the ID document

    try:
        iddoc['start_time_jspsych'] = iddoc['start_time_jspsych'].timestamp()  # Convert start time to timestamp
    except:
        pass

    # Check if the subject has completed
    if 'completed' not in iddoc or iddoc['completed'] == 0:
        notcompleted += 1
        continue

    # Retrieve setup and task data for the subject
    setupdoc = client.document(collname + '/{0}/setup/variables'.format(subj)).get().to_dict()
    datadoc = client.document(collname + '/{0}/taskdata/data'.format(subj)).get().to_dict()

    # Retrieve questionnaire data for the subject
    questionnaire = client.document(collname + '/{0}/questionnaire/data'.format(subj)).get().to_dict()

    # Create a dictionary to store subject data
    subjectdata = {'id': fsid, 'setupdoc': setupdoc, 'datadoc': datadoc, 'iddoc': iddoc, \
                   'questionnaire': questionnaire}

    # Append the subject data to the alldata list
    alldata.append(subjectdata)

In [5]:
# Print the number of subjects who opened the web app, did not complete, and completed the experiment
print('number of subjects that opened web app: ' + str(len(subjList)) +  
      '\nnumber of subjects that did not complete: ' + str(notcompleted) +  
      '\nnumber of subjects that performed the experiment: ' + str(len(alldata)))

number of subjects that opened web app: 138
number of subjects that did not complete: 29
number of subjects that performed the experiment: 109


Prolific automatically opens slot for experiment for a new person if a subject does not complete experiment. We have no information about the people who intended to do the experiment but did not complete it. Therefore, they are not mentioned in the paper.

In [6]:
data = []  # Empty list to store processed data for each subject
mood_rating_list = []  # Empty list to store mood ratings for each subject
symptoms_list = []  # Empty list to store symptom ratings for each subject
k = 0  # Counter to keep track of processed subjects

# Loop through the data for each subject
for i in range(len(alldata)):
    setupdoc = alldata[i]['setupdoc']
    datadoc = alldata[i]['datadoc']
    iddoc = alldata[i]['iddoc']
    subjid = alldata[i]['id']
    nTrials = int(setupdoc['nTrials'])
    
    try:
        # Extract attention checks and calculate attention check ratio
        attention_checks = prep.extract_attention_checks(alldata[i]['datadoc']['check_response'][1:-1])
        attention_check_ratio = np.sum(attention_checks == attention_cr) / len(attention_cr)

        # Exclude subjects with a low attention check ratio
        if attention_check_ratio < 0.7:
            print('exclusion due to failed attention checks subject ' + str(i+1) + ': ' + subjid)
            continue

        # Initialize arrays to store mood ratings
        ratings = np.empty([len(mood_categories), nTrials+2])

        # Process mood ratings for each trial
        for j in range(nTrials+2):
            idx = np.array(prep.stimulus_numbering(datadoc['stimulus' + str(j+1)], mood_categories))
            rating = np.array(prep.extract_reponse(datadoc['response' + str(j+1)]))
            ratings[:, j] = rating[idx]

        mood_rating_list.append(ratings)  # Add mood ratings to the list
        data.append(alldata[i])  # Add subject data to the list
        data[k]['ratings'] = ratings
        data[k]['ratings_normalized'] = prep.normalize_2d(ratings)
        data[k]['attention_checks'] = attention_checks
        slider_rt = np.asarray(datadoc['slider_rt'][1:-1].split(','))
        slider_rt[slider_rt=='null'] = np.nan
        data[k]['slider_rt'] = slider_rt.astype(float)
        symptoms = prep.extract_questionnaire_reponse(alldata[i]['questionnaire']['responses'])
        symptoms_list.append(symptoms)
        data[k]['symptoms'] = symptoms
        k += 1
        print('successfully transformed data for subject ' + str(i+1) + ': ' + subjid)
    except:
        print('no data ' + str(i+1) + ': ' + subjid)
        pass
    
mood_ratings = np.array(mood_rating_list).T  # Transpose mood ratings array
symptoms_ratings = np.array(symptoms_list)  # Convert symptoms list to array
Nsj = np.shape(mood_ratings)[2]  # Get the number of subjects (third dimension of mood ratings)

successfully transformed data for subject 1: 03Qa7Vn3GpW7oHyFGlnIJIOv9Uo1
successfully transformed data for subject 2: 0f37llahneZuzVNeOn24LZw1ukq1
successfully transformed data for subject 3: 0jmuq5dxlHRKff218eH4Zkkl0Jb2
successfully transformed data for subject 4: 1PccG5V7UpZ21zCzkTyVI6GJEba2
successfully transformed data for subject 5: 1ctyNByC4kfTb9GqDvjM2WzNerX2
successfully transformed data for subject 6: 1twtTg29gzNkhh8uHLLOOKS87Qr1
successfully transformed data for subject 7: 3VVx42fFdPfeqsBjXxmSioEbSvC2
successfully transformed data for subject 8: 3tH9330T6ddgp2RZ6K9vE3YDpvJ3
successfully transformed data for subject 9: 4V9oOwR9pzbw2CVxYQSehXZKXuF3
successfully transformed data for subject 10: 6QAP227aj4MZqvmLvVJvPsFKc6Z2
successfully transformed data for subject 11: 6u4PW7Q06WZfLn9FgPfrw1IdFVE2
successfully transformed data for subject 12: 6wZsr3ALaVSlaGFDG1yzK2VxLLv2
successfully transformed data for subject 13: 7VaMpZ75ZVPttZPN1qjBVmuwtxQ2
successfully transformed data for 

In [7]:
# save prep data for analyses
print('N = ' + str(len(data)))
np.save('../results/data_experiment.npy', data)

N = 108
