# FindingFive results analysis
Note: session results can only be downloaded once the session is finished. Therefore, all results are in the same file.

In [48]:
import numpy as np
import pandas as pd
import os
import difflib
import re

**Participant data from Calendly**

In [56]:
ff5_folder = "../data/ff5"
if os.path.exists(f"{ff5_folder}/participants.csv"):
    participants_path = f"{ff5_folder}/participants.csv"
    rewrite_participants = False
else:
    participants_path = f"{ff5_folder}/events-export.csv"
    rewrite_participants = True
part = pd.read_csv(participants_path)
print(f'Reading participants: {part.shape[0]} participants.')

Reading participants: 58 participants.


In [41]:
def parse_name(s:str, email:str):
    s = s.strip() # removing trailing space
    ss = s.split()
    email = re.sub(r'[0-9]+', '', email.split('@')[0]) # removing digits
    semail = email.split('.')

    if len(ss) == 2:
        # attempting finding correct ordering
        if (ss[0] == ss[0].upper()):
            return ss[1].capitalize(), ss[0].capitalize()
        elif ss[1] == ss[1].upper():
            return ss[0].capitalize(), ss[1].capitalize()
        m = difflib.get_close_matches(ss[0], semail, n=1)
        if (len(email) == 2) and len(m) >=1:
            # email usually is firstname lastname
            s_0_position = semail.index(m[0])
            return ss[s_0_position].capitalize(), ss[1-s_0_position].capitalize()
        # failing: returning as firstname lastname
        return ss[0].capitalize(), ss[1].capitalize()
    
    # otherwise: attempting to find correct name from email
    if ss[0].lower() in email:
        return ss[0].capitalize(), email.replace(ss[0].lower(),'').replace('.','').capitalize()
    elif len(email.split('.')) == 2:
        email = email.split('.')
        return email[0].capitalize(), email[1].capitalize()
    # failing
    return s, ''

In [42]:
if rewrite_participants:
    part = part[~part['Canceled']]
    part = part[['Invitee Name', 'Invitee Email', 'Text Reminder Number', 'Start Date & Time']].sort_values('Start Date & Time')
    # ideal header: idx,participant_lastname,participant_firstname,email,session_idx,session_date
    part.columns = ['participant_name', 'email', 'phonenumber','session_date']
    # reorder names (will need checking)
    t = part.apply(lambda x: parse_name(x.participant_name, x.email), axis=1)
    part['participant_firstname'] = t.apply(lambda x: x[0])
    part['participant_lastname'] = t.apply(lambda x: x[1]) 
    part['participant_name'] = part.participant_name.apply(lambda x: x.strip())
    # add session identifiers
    #part['session_idx'] = None
    #part['participant_role'] = None
    part['idx'] = part.apply(lambda x: None if x.participant_lastname == '' else f'{x.participant_firstname[0]}{x.participant_lastname[0]}', axis=1)
    # write participants file
    part = part[['idx', 'participant_name', 'participant_firstname', 'participant_lastname', 'email', 'phonenumber', \
            'session_date', 'session_idx', 'participant_role']]
    part.to_csv(participants_path, index=False)

sessions_pupdate = (part.session_idx.isna().sum() == part.shape[0])

In [None]:
updateonly_participants = False
if updateonly_participants:
    part['idx'] = part.apply(lambda x: None if x.participant_lastname == '' else f'{x.participant_firstname[0]}{x.participant_lastname[0]}', axis=1)
    part['participant_name'] = part.apply(lambda x: "{x.participant_firstname} {x.participant_lastname}")
    part.to_csv(participants_path, index=False)

In [78]:
part.head(3)

Unnamed: 0,idx,participant_name,participant_firstname,participant_lastname,email,phonenumber,session_date,session_idx,participant_role
0,LB,BOUILLOT Lola,Lola,Bouillot,lola.bouillot@gmail.com,,2022-11-16 09:00,,
1,MC,Montenon camille,Camille,Montenon,camille.montenon1@laposte.net,,2022-11-16 09:00,,
2,NB,Balmes,Noemie,Balmes,noemie.blms@gmail.com,'+33 6 25 55 68 34,2022-11-16 13:00,,


**Reading participant data from Finding Five**

In [47]:
results_path = f"{ff5_folder}/results.csv"
res = pd.read_csv(results_path)
print(f"Results - shape: {res.shape[0]}\t number of participants: {len(res.participant_id.unique())}")
res.head()

Results - shape: 114	 number of participants: 3


Unnamed: 0,expt_id,group_id,network_error_repeat,participant_id,participation_duration,response_correct,response_mode,response_name,response_rt,response_target,response_type,response_value,session_id,session_start_time,stimuli_presented,trial_duration,trial_num,trial_template
0,634535ee30f231f5c20a2862,default,0,2f384e56290bb8c551cd86d5a305d896184c585b8746a2...,193.478848,,keypress,GG1,3119,,text-response,Gabriel,63621bc77e0f34c157708f73,2022-11-02 07:27:03+00:00,,13459,1,G1
1,634535ee30f231f5c20a2862,default,0,2f384e56290bb8c551cd86d5a305d896184c585b8746a2...,193.478848,,keypress,GG2,6306,,text-response,23,63621bc77e0f34c157708f73,2022-11-02 07:27:03+00:00,,13459,1,G1
2,634535ee30f231f5c20a2862,default,0,2f384e56290bb8c551cd86d5a305d896184c585b8746a2...,193.478848,False,click,GG3,3717,[],choice-response,autre,63621bc77e0f34c157708f73,2022-11-02 07:27:03+00:00,,13459,1,G1
3,634535ee30f231f5c20a2862,default,0,2f384e56290bb8c551cd86d5a305d896184c585b8746a2...,193.478848,,keypress,GG4,6620,,rating-response,4,63621bc77e0f34c157708f73,2022-11-02 07:27:03+00:00,,12658,2,G1
4,634535ee30f231f5c20a2862,default,0,2f384e56290bb8c551cd86d5a305d896184c585b8746a2...,193.478848,,keypress,GG5,3980,,rating-response,1,63621bc77e0f34c157708f73,2022-11-02 07:27:03+00:00,,12658,2,G1


In [4]:
res.columns

Index(['expt_id', 'group_id', 'network_error_repeat', 'participant_id',
       'participation_duration', 'response_correct', 'response_mode',
       'response_name', 'response_rt', 'response_target', 'response_type',
       'response_value', 'session_id', 'session_start_time',
       'stimuli_presented', 'trial_duration', 'trial_num', 'trial_template'],
      dtype='object')

The goals are:
1. To extract the name and create a table of `names - patterns - session` for each participant (which will be checked align with the rest of the data)
2. To create a table associating patterns with responses - one line for each participant (also that can be obtained by pivoting the table)
3. To save in the directory where the rest of the data is.
4. To update the participant data with participant roles

Results directory will also contain questionnaires data (`responses.csv, stimuli.csv, procedure.json, trial-template.json`)

Note: 
* Identifying information: 'GG1' (name),'GG2' (age), 'GG3' (gender)
* Free responses: 'KE2' (KTaNE), 'DCQ2' (discussion), 'GE3' (attention during conversation)
* Task relevant information: 'GG4' (game experience), 'GG5' (ktane knowledge), 'GG6' (participant role)
* The rest of the answers are labelled

In [5]:
res.session_start_time.unique()

array(['2022-11-02 07:27:03+00:00'], dtype=object)

In [59]:
id_questions = ['GG1','GG2','GG3']
session_questions = ['GG6']
detail_questions = ['KE2', 'DCQ2', 'GE3']

**Creating table with identifying information**

In [50]:
id_data_path = f'{ff5_folder}/ff5-id-data.csv'

id_data = res[res.response_name.isin(id_questions+session_questions)][['participant_id','response_name','response_value']]
id_data = id_data.pivot(index='participant_id', columns='response_name').droplevel(0, axis=1)
#id_data = id_data.pivot_table(values='response_value', index='participant_id', columns='response_name', aggfunc=lambda x: list(x)[0])
id_data.columns = ['name','age','gender', 'participant_role']
#id_data['name_pattern'] = id_data.name.apply(lambda x: ''.join([y[0] for y in x.capitalize().split()]))
# name
id_data

Unnamed: 0_level_0,name,age,gender,name_pattern
participant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2f384e56290bb8c551cd86d5a305d896184c585b8746a2eb0139a49eb06362b7,Gabriel,23,autre,G
987b06e0cc69b7db6ddb2ef30c0af9ce57e9d35b2e05f0af0530118f36a31395,Philippe,15,M,P
f23940307cf59525aaca29e10732500c8776f86eda19682588d6571a356dd92f,Sebastián,29,M,S


The goal is to match the name to that of a participant in the participants table.

In [None]:
# getting participant name - pattern matches
participants = part.set_index('participant_name')['idx'].to_dict()
# prepping for querying
id_data.reset_index(drop=False).to_csv(id_data_path, index=False)

In [None]:
# querying - might need more than 1 exec
id_data = pd.read_csv(id_data_path, index_col=0)
id_data['name_pattern'] = id_data.name.apply(lambda x: difflib.get_close_matches(x, participants, 1))
id_data['name_pattern'] = id_data['name_pattern'].apply(lambda x: None if len(x) == 0 else x[0])

if id_data['name_pattern'].isna().sum() > 0:
    raise ValueError('Cannot match - Please check participants names in the table.')
else:
    id_data['name_pattern'] = id_data['name_pattern'].apply(lambda x: participants[x])

In [None]:
#### tests
#id_data['part_nb'] = ['manuel', 'ordinateur', 'ordinateur']
#id_data['session_datetime'] = [1,0,1]

Merge participant data with finding five:

In [None]:
part.merge(id_data.reset_index(drop=False), how='left', left_on='idx', right_on='name_pattern')
part.drop(columns=['name'], inplace=True)

In [52]:
def create_name_session(l:list):
    if len(l) != 2:
        #raise ValueError('Should be only two participants in the session. Currently:', len(l))
        return None
    return f"{l[0]}_{l[1]}"

part['participant_role'] = part['participant_role'].apply(lambda x: 1 if 'manuel' in x else 0)
# Create dictionary first, then apply to associate groups
d_name_sessions = part.sort_values(['session_date', 'participant_role'], ascending=True).groupby('session_date').agg({
    'idx': lambda x: create_name_session(list(x))
}).to_dict()['idx']
part['session_idx'] = part['session_date'].apply(lambda x: d_name_sessions[x])
part

Unnamed: 0_level_0,name,age,gender,name_pattern,part_nb,session_datetime,name_session
participant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2f384e56290bb8c551cd86d5a305d896184c585b8746a2eb0139a49eb06362b7,Gabriel,23,autre,G,1,1,S_G
987b06e0cc69b7db6ddb2ef30c0af9ce57e9d35b2e05f0af0530118f36a31395,Philippe,15,M,P,0,0,
f23940307cf59525aaca29e10732500c8776f86eda19682588d6571a356dd92f,Sebastián,29,M,S,0,1,S_G


Update / Cleanup files

In [None]:
# update participants file
part.to_csv(participants_path, index=False)
# remove id_data (temporary file)
os.remove(id_data_path)

**Creating table with text zones information and anonymizing**

In [60]:
text_data = res[res.response_name.isin(detail_questions)][['participant_id','response_name','response_value']]
text_zones = {'KE2': 'KTaNE', 'DCQ2':'Discussion', 'GE3': 'General_Attention'}
text_data.response_name.apply(lambda x: text_zones[x])
text_data.head(3)

Unnamed: 0,participant_id,response_name,response_value
24,2f384e56290bb8c551cd86d5a305d896184c585b8746a2...,KE2,
30,2f384e56290bb8c551cd86d5a305d896184c585b8746a2...,DCQ2,
37,2f384e56290bb8c551cd86d5a305d896184c585b8746a2...,GE3,


In [None]:
text_data.to_csv(f'{ff5_folder}/q-expe-details.csv', index=False)

**Creating table with question information and anonymizing**

In [75]:
q_data = res[~res.response_name.isin(id_questions+detail_questions)][['participant_id','response_name','response_value']]
# still need participant role
#q_data = q_data.pivot(index='participant_id', columns='response_name').droplevel(0, axis=1)
questions = {
    'GG6': 'General|Game Role',
    'GG4': 'General|Gaming Activity', 
    'GG5': 'General|KTane Connaissance',
    'GT1': 'General Trust',
    'GE1': 'General Self Engagement',
    'GE2': 'General Partner Engagement',
    'DCQ1': 'Discussion CommunicationEfficiency', 
    'KCQ1': 'KTaNe CommunicationEfficiency', 
    'KE1': 'KTaNe TeamEfficiency'
}
def split_qr(x:str):
    topic = x.split('|')[0]
    x = x.replace(topic, questions[topic])
    # some question don't have | at first, but others still need name replacing
    topic = x.split('|')[0] 
    if len(topic.split()) > 1:
        topic = topic.split()
        subtopic = topic[-1]
        topic = ' '.join(topic[:-1])
    else:
        subtopic = topic
    question = x.split('|')[1]
    return topic, subtopic, question

q_data['response_name'] = q_data.response_name.apply(split_qr)
q_data['questionnaire_topic'] = q_data.response_name.apply(lambda x: x[0])
q_data['questionnaire_subtopic'] = q_data.response_name.apply(lambda x: x[1])
q_data['questionnaire_question'] = q_data.response_name.apply(lambda x: x[2])
q_data = q_data[['participant_id', 'questionnaire_topic','questionnaire_subtopic','questionnaire_question','response_value']]

In [77]:
q_data_p = q_data.pivot_table(values='response_value', index=['participant_id'], 
        columns=['questionnaire_topic','questionnaire_subtopic','questionnaire_question'], aggfunc=lambda x: list(x)[0])
q_data_p.head()    

questionnaire_topic,Discussion,Discussion,Discussion,Discussion,Discussion,General,General,General,General,General,...,KTaNe,KTaNe,KTaNe,KTaNe,KTaNe,KTaNe,KTaNe,KTaNe,KTaNe,KTaNe
questionnaire_subtopic,CommunicationEfficiency,CommunicationEfficiency,CommunicationEfficiency,CommunicationEfficiency,CommunicationEfficiency,General,General,Trust,Trust,Trust,...,CommunicationEfficiency,CommunicationEfficiency,CommunicationEfficiency,CommunicationEfficiency,CommunicationEfficiency,TeamEfficiency,TeamEfficiency,TeamEfficiency,TeamEfficiency,TeamEfficiency
questionnaire_question,claire,complete,dans les temps ?,efficace,fluide,Gaming Activity,KTane Connaissance,Il est mieux de se méfier des autres jusqu’à ce qu’ils gagnent ma confiance,Je crois les gens sur parole,Je me fais des amis facliement,...,claire,complete,dans les temps ?,efficace,fluide,Cette équipe a besoin de s'améliorer,Cette équipe a commis peu d'erreurs,Cette équipe a constamment fourni un résultat de qualité,Cette équipe a fourni un travail de qualité,Cette équipe n'a eu besoin d'aucun effort pour performer
participant_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2f384e56290bb8c551cd86d5a305d896184c585b8746a2eb0139a49eb06362b7,5,5,5,5,5,4,1,4,3,5,...,4,4,5,5,4,4,2,4,4,3
987b06e0cc69b7db6ddb2ef30c0af9ce57e9d35b2e05f0af0530118f36a31395,3,3,3,3,3,0,2,6,1,7,...,3,1,1,3,5,2,2,1,4,5
f23940307cf59525aaca29e10732500c8776f86eda19682588d6571a356dd92f,5,5,5,5,5,3,0,6,5,5,...,5,5,5,5,5,3,5,5,5,5
