# Imports

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import os
import shutil
import scipy.stats as stats
import scikit_posthocs as sp

Open all of the relevant data files/directories

Emily downloaded the final data and Qualtrics surveys on 4/16/24

Qualtrics export settings:

<img src="qualtrics_export_settings.png" alt="Qualtrics Export Settings" width="500px">

In [2]:
data_dir = 'C:/Users/Emily Jensen/OneDrive - UCB-O365/Drone Feedback Data/data/'

conditions_file = data_dir + 'conditions.csv'
conditions = pd.read_csv(conditions_file)

exit_survey_file = data_dir + 'exit_survey.csv'
exit_survey = pd.read_csv(exit_survey_file)

# Data Cleaning

## Set participant info data types and remove test data

In [3]:
# list each column and its data type
print(conditions.dtypes)

time         object
user_id      object
condition    object
dtype: object


In [3]:
# make time column a datetime object
conditions['time'] = pd.to_datetime(conditions['time'])

# make condition column a category
conditions['condition'] = conditions['condition'].astype('category')

print(conditions.dtypes)

time         datetime64[ns]
user_id              object
condition          category
dtype: object


In [4]:
# remove entries that happened before 11:15am on 2024-04-10
# this is when we launched the first experiment condition
conditions = conditions[conditions['time'] >= '2024-04-10 11:15:00']

# remove any entries where user_id includes 'emily'
conditions = conditions[~conditions['user_id'].str.contains('emily')]

print(len(conditions))

218


In [5]:
# looks like we retain just prolific ids
# this includes people who dropped out
print(conditions['user_id'].unique())

['5f84d512acba571a2bdda680' '5760a995f371330006a47cb3'
 '5eb4955dab41e130f24d08c9' '640156a5f2395bf80ca0c451'
 '6413599c09e145dd93c7aeac' '5f2dc46d6fa1250ee8a1a15d'
 '60fde5e29e585481874a9d16' '6526a14c3f9823cccedb7687'
 '62d1228fcd446896ce7c9ec0' '63e688adbc8788de6d596c93'
 '61036bbd791964fafe65236a' '63d79e5ecdcf4d0dbd646bf6'
 '5dccb82a76eab294aa4837ff' '5d34d17089232600011ade3a'
 '6113a1bd2592fc45dff695a2' '6108614d6f2cdb85bd396d6e'
 '56bae08f30d6b30005f8537a' '612e41fb25de530ea83df0bc'
 '5f888877136ad50208b48b47' '65cba99c92b362b45e414da7'
 '62c50bf9b7587ff5073cd7fb' '63b6dfb29118fec2d923f8c3'
 '654d0bd1f4ba143e0503a02f' '5f5fa5d24b9f98028f4090e7'
 '5dce3ccc32ccbf0cd54263db' '611291090e626fdfde536f38'
 '62e185484154c451882a8a3d' '60d76140337e60ae26f7fce9'
 '6333c1f5756acfabfde457ed' '60b7bd75af8c92afa748324f'
 '60255901704fd208ecdcf32b' '63626a68cf44b4184483c8e8'
 '614e664d1657383cbf801e52' '610803b9ce5f71efbd6e1722'
 '5700be5c8a49c7000e0c768f' '5d9b866189c03c001540eff1'
 '626966a1

In [6]:
qualtrics_columns = {
    'StartDate': 'start_date',
    'EndDate': 'end_date',
    'Duration (in seconds)': 'duration',
    'Finished': 'is_finished', # 1 is finished
    'Q9': 'prolific_id', # user inputted prolific id
    'Q1': 'gender', # mapping below
    'Q8': 'age', # number input
    'Q3': 'drone_experience', # mapping below
    'Q4': 'video_game_experience', # mapping below
    'Q5': 'feedback_helped', # likert mapping below
    'Q6': 'change_from_feedback', # open text response
    'Q7': 'comments' # open text response, optional
}

# rename columns
exit_survey = exit_survey.rename(columns=qualtrics_columns)

# remove extra columns and rows
exit_survey = exit_survey.drop(columns=['Status', 'Progress', 'RecordedDate', 'ResponseId', 'DistributionChannel', 'UserLanguage'])
exit_survey = exit_survey.drop([0,1])

# set data types
exit_survey['start_date'] = pd.to_datetime(exit_survey['start_date'])
exit_survey['end_date'] = pd.to_datetime(exit_survey['end_date'])
exit_survey['duration'] = pd.to_numeric(exit_survey['duration'])
exit_survey['is_finished'] = exit_survey['is_finished'].astype(bool)
exit_survey['age'] = pd.to_numeric(exit_survey['age'])

In [7]:
# remove entries that happened before 11:15am on 2024-04-10
# this is when we launched the first experiment condition
exit_survey = exit_survey[exit_survey['start_date'] >= '2024-04-10 11:15:00']

# remove any entries where prolific_id includes 'emily'
exit_survey = exit_survey[~exit_survey['prolific_id'].str.contains('emily')]

print(len(exit_survey))

148


In [8]:
# looks like we retain just prolific ids
# this does not include people who dropped out
print(exit_survey['prolific_id'].unique())

['5eb4955dab41e130f24d08c9' '640156a5f2395bf80ca0c451'
 '5760a995f371330006a47cb3' '5f2dc46d6fa1250ee8a1a15d'
 '60fde5e29e585481874a9d16' '62d1228fcd446896ce7c9ec0'
 '6526a14c3f9823cccedb7687' '63e688adbc8788de6d596c93'
 '5f888877136ad50208b48b47' '5d34d17089232600011ade3a'
 '62c50bf9b7587ff5073cd7fb' '63d79e5ecdcf4d0dbd646bf6'
 '60d76140337e60ae26f7fce9' '6333c1f5756acfabfde457ed'
 '610803b9ce5f71efbd6e1722' '60b7bd75af8c92afa748324f'
 '63626a68cf44b4184483c8e8' '63b6dfb29118fec2d923f8c3'
 '5dccb82a76eab294aa4837ff' '5efcaaee74e56207d16db007'
 '5dce3ccc32ccbf0cd54263db' '5d9b866189c03c001540eff1'
 '5f513339b2c26c338771f1d0' '614e664d1657383cbf801e52'
 '60255901704fd208ecdcf32b' '5de6eca0a91be366cbb5ffe2'
 '643b53b6aeaabf186d24e099' '5b75b2ebc5e14d00013669fc'
 '62d9e46fded6a6209a518499' '63ed0ed001893f9028e06e41'
 '5a6cc406d5d4cb0001d664aa' '629658baad2881aba974c6c3'
 '5c4684826a7dbc00017c0a87' '56bae08f30d6b30005f8537a'
 '65cba99c92b362b45e414da7' '5b824fb6cc06660001a302d4'
 '60580179

In [9]:
# process categorical data
exit_survey['gender'] = pd.Categorical(exit_survey['gender'])

drone_map = {'I have never flown a drone': "None", 
             'I have tried flying a drone a few times': "Some", 
             'I regularly fly drones': "Regularly", 
             'I am an expert or professional drone pilot': "Professional"}
exit_survey['drone_experience'] = exit_survey['drone_experience'].replace(drone_map)
exit_survey['drone_experience'] = pd.Categorical(exit_survey['drone_experience'], categories=['None', 'Some', 'Regularly', 'Professional'], ordered=True)

game_map = {'I do not play video games': "None", 
            'I play video games at least once per month': "Monthly", 
            'I play video games at least once per week': "Weekly", 
            'I play video games almost every day': "Daily"}
exit_survey['video_game_experience'] = exit_survey['video_game_experience'].replace(game_map)
exit_survey['video_game_experience'] = pd.Categorical(exit_survey['video_game_experience'], categories=['None', 'Monthly', 'Weekly', 'Daily'], ordered=True)

likert_map = {'1 - Strongly Disagree': "Strongly Disagree", 
              '2': "Disagree", 
              '3 - Neither agree nor disagree': "Neutral", 
              '4': "Agree", 
              '5 - Strongly Agree': "Strongly Agree"}
exit_survey['feedback_helped'] = exit_survey['feedback_helped'].replace(likert_map)
exit_survey['feedback_helped'] = pd.Categorical(exit_survey['feedback_helped'], categories=['Strongly Disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly Agree'], ordered=True)

likert_map_collapsed = {'Strongly Disagree': "Disagree", 
                        'Disagree': "Disagree", 
                        'Neutral': "Neutral", 
                        'Agree': "Agree", 
                        'Strongly Agree': "Agree"}
exit_survey['feedback_helped_collapsed'] = exit_survey['feedback_helped'].replace(likert_map_collapsed)
exit_survey['feedback_helped_collapsed'] = pd.Categorical(exit_survey['feedback_helped_collapsed'], categories=['Disagree', 'Neutral', 'Agree'], ordered=True)

exit_survey.head(10)

Unnamed: 0,start_date,end_date,duration,is_finished,prolific_id,gender,age,drone_experience,video_game_experience,feedback_helped,change_from_feedback,comments,feedback_helped_collapsed
3,2024-04-10 11:35:59,2024-04-10 11:59:47,1428,True,5eb4955dab41e130f24d08c9,Woman,48,,Weekly,Strongly Agree,"I did change my technique, I started going up ...",,Agree
4,2024-04-10 11:39:03,2024-04-10 12:11:03,1919,True,640156a5f2395bf80ca0c451,Man,33,,Daily,Disagree,I tried to take into account what it was sayin...,,Disagree
5,2024-04-10 11:21:24,2024-04-10 12:15:07,3222,True,5760a995f371330006a47cb3,Woman,33,,Daily,Neutral,Yes. I began to tap buttons instead of holding...,,Neutral
6,2024-04-10 11:58:31,2024-04-10 12:18:50,1218,True,5f2dc46d6fa1250ee8a1a15d,Woman,37,,,Neutral,It was easier for me to adjust based on my con...,,Neutral
7,2024-04-10 11:59:20,2024-04-10 12:21:20,1320,True,60fde5e29e585481874a9d16,Non-binary,41,,Monthly,Agree,i figured out how to slow the drone down befor...,,Agree
8,2024-04-10 12:03:02,2024-04-10 12:24:52,1310,True,62d1228fcd446896ce7c9ec0,Man,31,,Weekly,Neutral,No,no,Neutral
9,2024-04-10 12:00:10,2024-04-10 12:34:04,2034,True,6526a14c3f9823cccedb7687,Woman,46,,,Strongly Agree,Showing where I overcompensated and suggesting...,"This was fun, thanks. If this will actually be...",Agree
10,2024-04-10 12:07:18,2024-04-10 12:38:48,1889,True,63e688adbc8788de6d596c93,Woman,45,,Monthly,Agree,The feedback helped me know what to work on to...,,Agree
11,2024-04-10 12:20:59,2024-04-10 12:43:45,1366,True,5f888877136ad50208b48b47,Man,23,,Monthly,Agree,Feedbacks were very similar,,Agree
12,2024-04-10 12:12:43,2024-04-10 12:44:32,1909,True,5d34d17089232600011ade3a,Man,35,,Weekly,Strongly Agree,"Towards the end, i started getting too confide...",No technical issues with this survey,Agree


In [10]:
# merge conditions and exit_survey on user_id and prolific_id
# only keep rows that have a match in both dataframes
merged = pd.merge(conditions, exit_survey, left_on='user_id', right_on='prolific_id', how='inner')

# drop extra user_id column
merged = merged.drop(columns=['user_id'])

print(len(merged)) # why are there more rows than exit_survey?

168


In [11]:
# participants who show up multiple times in the merged dataframe
# this is because they restarted the experiment and have multiple entries in the conditions file
duplicate_ids = merged[merged.duplicated(subset='prolific_id', keep=False)]['prolific_id'].unique()
duplicate_ids.sort()
print(duplicate_ids)

['5bd49bcc25db7b0001794063' '5c90094e71f3100016181ea9'
 '5ef9f528c7ae587afa25fe9b' '60fcc292d13ae9614d4a77a7'
 '6105c41aa4fe602501d5a8cc' '610796f1301fccdca446af57'
 '629658baad2881aba974c6c3' '63026a8fd8429b224cd2a134'
 '631f1b608af38f654d2a3b1f' '637d4196c70a66e28ecede34'
 '6388b6c86e47b08e1eded1fd' '63ba10de73415d047e1d6731'
 '6400dc9f84ed19aeedc1a2cc' '643c6175d46d41e74033994f'
 '652ab7948cb59f4c50c7972a' '6596a5cad60ef105b6c18897'
 '65cba99c92b362b45e414da7']


In [12]:
# drop duplicate entries with duplicate ids
merged = merged.drop_duplicates(subset='prolific_id', keep='first')
print(len(merged)) # better number!

147


In [13]:
# save merged dataframe to file
participant_file = data_dir + 'participant_info.csv'
merged.to_csv(participant_file, index=False)

Notes on participants that seem to have restarted the experiment:
- `5bd49bcc25db7b0001794063` restarted after trial 1 - KEEP
- `5c90094e71f3100016181ea9` restarted after trial 3 and again after trial 1 - KEEP
- `5ef9f528c7ae587afa25fe9b` restarted after trial 6 - REMOVE
- `60fcc292d13ae9614d4a77a7` restarted after trial 3 - KEEP
- `6105c41aa4fe602501d5a8cc` restarted after trial 8 - REMOVE
- `610796f1301fccdca446af57` restarted after trial 7 - REMOVE
- `629658baad2881aba974c6c3` restarted after trial 2 - KEEP
- `63026a8fd8429b224cd2a134` restarted before completing trial 1 - KEEP
- `631f1b608af38f654d2a3b1f` restarted after trial 15 - REMOVE
- `637d4196c70a66e28ecede34` restarted before completing trial 1 - KEEP
- `6388b6c86e47b08e1eded1fd` restarted before completing trial 1 - KEEP
- `63ba10de73415d047e1d6731` restarted after trial 5 and again after trial 1 - REMOVE
- `643c6175d46d41e74033994f` restarted after trial 8 and again after trial 1 - REMOVE
- `652ab7948cb59f4c50c7972a` restarted after trial 1 - KEEP
- `6596a5cad60ef105b6c18897` restarted after trial 1 - KEEP
- `65cba99c92b362b45e414da7` restarted after trial 17 - REMOVE

We will keep the participants who complete less than 5 trials before completing a complete run.

Participant `6400dc9f84ed19aeedc1a2cc` seems to have filled out the qualtrics survey twice. The numerical data are the same and the free response questions have very similar content so I feel fine only keeping the first entry.

In [14]:
# list of participants to remove from all analysis
# we will retain participants that restarted within the first 5 trials
remove_ids = ['5ef9f528c7ae587afa25fe9b', '6105c41aa4fe602501d5a8cc', 
              '610796f1301fccdca446af57', '631f1b608af38f654d2a3b1f', 
              '63ba10de73415d047e1d6731', '643c6175d46d41e74033994f', 
              '65cba99c92b362b45e414da7']

In [15]:
# remove participants from merged dataframe
merged_filtered = merged[~merged['prolific_id'].isin(remove_ids)]
print(len(merged_filtered))

140


In [33]:
# calculate total time spent on the experiment
def calculate_total_time(user_id):
    log = pd.read_csv(data_dir + user_id + '/log.txt', sep=': ', names=['time','message'], parse_dates=['time'])
    start_time = log[log['message'].str.contains('Received user ID')].iloc[-1]['time'] # choose the last one in case they restarted
    end_time = log[log['message'].str.contains('qualtrics')].iloc[-1]['time'] # choose last one just to be consistent
    return end_time - start_time

In [35]:
merged_filtered['total_time'] = merged_filtered['prolific_id'].apply(calculate_total_time)

In [36]:
filtered_participant_file = data_dir + 'participant_info_filtered.csv'
merged_filtered.to_csv(filtered_participant_file, index=False)

# Distributions of demographic data

This is after filtering out participants that restarted the experiment

In [37]:
# number of participants in each condition
print(merged_filtered['condition'].value_counts())

condition
score    47
text     47
full     46
demo      0
Name: count, dtype: int64


In [38]:
# gender distribution
print(merged_filtered['gender'].value_counts())
print((merged_filtered['gender'].value_counts()/len(merged_filtered)).round(2))


gender
Woman         71
Man           62
Non-binary     7
Name: count, dtype: int64
gender
Woman         0.51
Man           0.44
Non-binary    0.05
Name: count, dtype: float64


In [39]:
# age distribution
print(merged_filtered['age'].describe().round(1))

count    140.0
mean      38.3
std       12.2
min       18.0
25%       30.0
50%       35.5
75%       47.0
max       74.0
Name: age, dtype: float64


In [40]:
# previous experience with flying drones
print(merged_filtered['drone_experience'].value_counts().sort_index())
print((merged_filtered['drone_experience'].value_counts().sort_index()/len(merged_filtered)).round(2))

drone_experience
None            114
Some             20
Regularly         5
Professional      1
Name: count, dtype: int64
drone_experience
None            0.81
Some            0.14
Regularly       0.04
Professional    0.01
Name: count, dtype: float64


In [41]:
# video game experience
print(merged_filtered['video_game_experience'].value_counts().sort_index())
print((merged_filtered['video_game_experience'].value_counts().sort_index()/len(merged_filtered)).round(2))

video_game_experience
None       24
Monthly    39
Weekly     36
Daily      41
Name: count, dtype: int64
video_game_experience
None       0.17
Monthly    0.28
Weekly     0.26
Daily      0.29
Name: count, dtype: float64


In [42]:
# overall view of feedback perception
print(merged_filtered['feedback_helped'].value_counts().sort_index())
print((merged_filtered['feedback_helped'].value_counts().sort_index()/len(merged_filtered)).round(2))

feedback_helped
Strongly Disagree    22
Disagree             17
Neutral              22
Agree                59
Strongly Agree       20
Name: count, dtype: int64
feedback_helped
Strongly Disagree    0.16
Disagree             0.12
Neutral              0.16
Agree                0.42
Strongly Agree       0.14
Name: count, dtype: float64


In [43]:
# overall view of feedback perception (collapsed)
print(merged_filtered['feedback_helped_collapsed'].value_counts().sort_index())
print((merged_filtered['feedback_helped_collapsed'].value_counts().sort_index()/len(merged_filtered)).round(2))

feedback_helped_collapsed
Disagree    39
Neutral     22
Agree       79
Name: count, dtype: int64
feedback_helped_collapsed
Disagree    0.28
Neutral     0.16
Agree       0.56
Name: count, dtype: float64


In [44]:
# time taken to complete the trials
print(merged_filtered['total_time'].describe())

count                          140
mean        0 days 00:29:58.052765
std      0 days 00:10:37.740892576
min         0 days 00:13:25.435327
25%      0 days 00:22:26.670653750
50%      0 days 00:27:20.474471500
75%      0 days 00:35:18.946242250
max         0 days 01:26:32.209476
Name: total_time, dtype: object


# Research Themes

## Perception of Feedback

### Combine all survey responses

In [28]:
# combine survey responses from trials into one dataframe
responses = []

for participant in merged_filtered['prolific_id']:
    surveys = pd.read_csv(data_dir + participant + '/survey_responses.csv')
    surveys['prolific_id'] = participant
    responses.append(surveys)

all_responses = pd.concat(responses, ignore_index=True)
all_responses.head()

Unnamed: 0,trial,motivation,manageable,actionable,timely,reflection,outcome,prolific_id
0,1,3,3,4,3,3,Crash,5760a995f371330006a47cb3
1,2,3,3,4,3,4,Crash,5760a995f371330006a47cb3
2,3,3,3,4,3,3,Unsafe,5760a995f371330006a47cb3
3,4,3,3,4,3,3,Unsafe,5760a995f371330006a47cb3
4,5,3,3,4,3,3,Crash,5760a995f371330006a47cb3


In [29]:
# make survey responses categorical
survey_likert_map = {'1': "Strongly Disagree", 
                     '2': "Disagree", 
                     '3': "Neutral", 
                     '4': "Agree", 
                     '5': "Strongly Agree"}
likert_categories = ['Strongly Disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly Agree']


all_responses['motivation'] = all_responses['motivation'].apply(str)
all_responses['motivation'] = all_responses['motivation'].replace(survey_likert_map)
all_responses['motivation'] = pd.Categorical(all_responses['motivation'], categories=likert_categories, ordered=True)
all_responses['motivation_collapsed'] = all_responses['motivation'].replace(likert_map_collapsed)
all_responses['motivation_collapsed'] = pd.Categorical(all_responses['motivation_collapsed'], categories=['Disagree', 'Neutral', 'Agree'], ordered=True)

all_responses['actionable'] = all_responses['actionable'].apply(str)
all_responses['actionable'] = all_responses['actionable'].replace(survey_likert_map)
all_responses['actionable'] = pd.Categorical(all_responses['actionable'], categories=likert_categories, ordered=True)
all_responses['actionable_collapsed'] = all_responses['actionable'].replace(likert_map_collapsed)
all_responses['actionable_collapsed'] = pd.Categorical(all_responses['actionable_collapsed'], categories=['Disagree', 'Neutral', 'Agree'], ordered=True)

all_responses['reflection'] = all_responses['reflection'].apply(str)
all_responses['reflection'] = all_responses['reflection'].replace(survey_likert_map)
all_responses['reflection'] = pd.Categorical(all_responses['reflection'], categories=likert_categories, ordered=True)
all_responses['reflection_collapsed'] = all_responses['reflection'].replace(likert_map_collapsed)
all_responses['reflection_collapsed'] = pd.Categorical(all_responses['reflection_collapsed'], categories=['Disagree', 'Neutral', 'Agree'], ordered=True)

manageable_likert_map = {'1': "Much too little",
                         '2': "Too little",
                         '3': "Just right",
                         '4': "Too much",
                         '5': "Much too much"}
manageable_categories = ['Much too little', 'Too little', 'Just right', 'Too much', 'Much too much']
manageable_likert_map_collapsed = {'Much too little': "Too little",
                                   'Too little': "Too little",
                                   'Just right': "Just right",
                                   'Too much': "Too much",
                                   'Much too much': "Too much"}

all_responses['manageable'] = all_responses['manageable'].apply(str)
all_responses['manageable'] = all_responses['manageable'].replace(manageable_likert_map)
all_responses['manageable'] = pd.Categorical(all_responses['manageable'], categories=manageable_categories, ordered=True)
all_responses['manageable_collapsed'] = all_responses['manageable'].replace(manageable_likert_map_collapsed)
all_responses['manageable_collapsed'] = pd.Categorical(all_responses['manageable_collapsed'], categories=['Too little', 'Just right', 'Too much'], ordered=True)

timely_likert_map = {'1': "Much too infrequent",
                     '2': "Too infrequent",
                     '3': "Just enough",
                     '4': "Too often",
                     '5': "Much too often"}
timely_categories = ['Much too infrequent', 'Too infrequent', 'Just enough', 'Too often', 'Much too often']
timely_likert_map_collapsed = {'Much too infrequent': "Too infrequent",
                               'Too infrequent': "Too infrequent",
                               'Just enough': "Just enough",
                               'Too often': "Too often",
                               'Much too often': "Too often"}

all_responses['timely'] = all_responses['timely'].apply(str)
all_responses['timely'] = all_responses['timely'].replace(timely_likert_map)
all_responses['timely'] = pd.Categorical(all_responses['timely'], categories=timely_categories, ordered=True)
all_responses['timely_collapsed'] = all_responses['timely'].replace(timely_likert_map_collapsed)
all_responses['timely_collapsed'] = pd.Categorical(all_responses['timely_collapsed'], categories=['Too infrequent', 'Just enough', 'Too often'], ordered=True)

all_responses['outcome'] = pd.Categorical(all_responses['outcome'], categories=['Crash', 'Unsafe', 'Safe'], ordered=True)

all_responses.head()

Unnamed: 0,trial,motivation,manageable,actionable,timely,reflection,outcome,prolific_id,motivation_collapsed,actionable_collapsed,reflection_collapsed,manageable_collapsed,timely_collapsed
0,1,Neutral,Just right,Agree,Just enough,Neutral,Crash,5760a995f371330006a47cb3,Neutral,Agree,Neutral,Just right,Just enough
1,2,Neutral,Just right,Agree,Just enough,Agree,Crash,5760a995f371330006a47cb3,Neutral,Agree,Agree,Just right,Just enough
2,3,Neutral,Just right,Agree,Just enough,Neutral,Unsafe,5760a995f371330006a47cb3,Neutral,Agree,Neutral,Just right,Just enough
3,4,Neutral,Just right,Agree,Just enough,Neutral,Unsafe,5760a995f371330006a47cb3,Neutral,Agree,Neutral,Just right,Just enough
4,5,Neutral,Just right,Agree,Just enough,Neutral,Crash,5760a995f371330006a47cb3,Neutral,Agree,Neutral,Just right,Just enough


In [26]:
print(len(all_responses))

2800


In [30]:
all_responses.dtypes

trial                      int64
motivation              category
manageable              category
actionable              category
timely                  category
reflection              category
outcome                 category
prolific_id               object
motivation_collapsed    category
actionable_collapsed    category
reflection_collapsed    category
manageable_collapsed    category
timely_collapsed        category
dtype: object

In [31]:
# add column for feedback condition
all_responses['condition'] = all_responses['prolific_id'].map(merged_filtered.set_index('prolific_id')['condition'])
all_responses['condition'] = pd.Categorical(all_responses['condition'], categories=['score', 'text', 'full'], ordered=True)
print(all_responses.head())
print(all_responses.tail())

   trial motivation  manageable actionable       timely reflection outcome  \
0      1    Neutral  Just right      Agree  Just enough    Neutral   Crash   
1      2    Neutral  Just right      Agree  Just enough      Agree   Crash   
2      3    Neutral  Just right      Agree  Just enough    Neutral  Unsafe   
3      4    Neutral  Just right      Agree  Just enough    Neutral  Unsafe   
4      5    Neutral  Just right      Agree  Just enough    Neutral   Crash   

                prolific_id motivation_collapsed actionable_collapsed  \
0  5760a995f371330006a47cb3              Neutral                Agree   
1  5760a995f371330006a47cb3              Neutral                Agree   
2  5760a995f371330006a47cb3              Neutral                Agree   
3  5760a995f371330006a47cb3              Neutral                Agree   
4  5760a995f371330006a47cb3              Neutral                Agree   

  reflection_collapsed manageable_collapsed timely_collapsed condition  
0              Neut

In [45]:
def calculate_trial_time(user_id, trial_num):
    log = pd.read_csv(data_dir + user_id + '/log.txt', sep=': ', names=['time','message'], parse_dates=['time'])
    trial_start_idx = log[log['message'].str.contains(f'Trial {trial_num} started')].iloc[-1].name
    start_time = log.loc[trial_start_idx]['time']
    end_time = log.loc[trial_start_idx+1]['time'] # next line is when trajectory data is sent to the server
    return end_time - start_time

In [47]:
all_responses['trial_time'] = all_responses.apply(lambda x: calculate_trial_time(x['prolific_id'], x['trial']), axis=1)

In [48]:
all_responses['trial_time'].describe()

count                         2800
mean     0 days 00:00:29.385628283
std      0 days 00:00:33.595747335
min       -1 days +23:59:59.999928
25%      0 days 00:00:16.338158750
50%      0 days 00:00:24.289769500
75%         0 days 00:00:36.310053
max         0 days 00:17:52.321791
Name: trial_time, dtype: object

In [49]:
def calculate_feedback_time(user_id, trial_num):
    log = pd.read_csv(data_dir + user_id + '/log.txt', sep=': ', names=['time','message'], parse_dates=['time'])
    trial_start_idx = log[log['message'].str.contains(f'Trial {trial_num} started')].iloc[-1].name
    start_time = log.loc[trial_start_idx+3]['time'] # saved feedback to file
    end_time = log.loc[trial_start_idx+4]['time'] # received survey responses
    return end_time - start_time

In [50]:
all_responses['feedback_time'] = all_responses.apply(lambda x: calculate_feedback_time(x['prolific_id'], x['trial']), axis=1)

In [51]:
all_responses['feedback_time'].describe()

count                         2800
mean     0 days 00:00:34.071604695
std      0 days 00:01:31.318088845
min         0 days 00:00:00.001766
25%         0 days 00:00:14.383753
50%      0 days 00:00:24.099650500
75%      0 days 00:00:36.450295500
max         0 days 01:07:44.941621
Name: feedback_time, dtype: object

In [52]:
survey_file = data_dir + 'survey_responses_combined.csv'
all_responses.to_csv(survey_file, index=False)

In [55]:
# calculate average trial and feedback times for each participant
avg_trial_time = all_responses.groupby('prolific_id')['trial_time'].mean()
avg_feedback_time = all_responses.groupby('prolific_id')['feedback_time'].mean()

# merge with merged_filtered dataframe
merged_filtered = pd.merge(merged_filtered, avg_trial_time, left_on='prolific_id', right_index=True)
merged_filtered = pd.merge(merged_filtered, avg_feedback_time, left_on='prolific_id', right_index=True)

merged_filtered.head()

Unnamed: 0,time,condition,start_date,end_date,duration,is_finished,prolific_id,gender,age,drone_experience,video_game_experience,feedback_helped,change_from_feedback,comments,feedback_helped_collapsed,total_time,trial_time_x,trial_time_y,feedback_time
0,2024-04-10 11:21:44.981234,full,2024-04-10 11:21:24,2024-04-10 12:15:07,3222,True,5760a995f371330006a47cb3,Woman,33,,Daily,Neutral,Yes. I began to tap buttons instead of holding...,,Neutral,0 days 00:51:22.367016,0 days 00:00:57.915392900,0 days 00:00:57.915392900,0 days 00:01:11.068398650
1,2024-04-10 11:36:12.740984,full,2024-04-10 11:35:59,2024-04-10 11:59:47,1428,True,5eb4955dab41e130f24d08c9,Woman,48,,Weekly,Strongly Agree,"I did change my technique, I started going up ...",,Agree,0 days 00:22:30.360526,0 days 00:00:31.817062550,0 days 00:00:31.817062550,0 days 00:00:14.629016350
2,2024-04-10 11:39:27.099011,full,2024-04-10 11:39:03,2024-04-10 12:11:03,1919,True,640156a5f2395bf80ca0c451,Man,33,,Daily,Disagree,I tried to take into account what it was sayin...,,Disagree,0 days 00:29:46.060749,0 days 00:00:24.128714850,0 days 00:00:24.128714850,0 days 00:00:28.866800950
3,2024-04-10 11:58:52.113563,full,2024-04-10 11:58:31,2024-04-10 12:18:50,1218,True,5f2dc46d6fa1250ee8a1a15d,Woman,37,,,Neutral,It was easier for me to adjust based on my con...,,Neutral,0 days 00:19:21.925813,0 days 00:00:21.398428350,0 days 00:00:21.398428350,0 days 00:00:20.921589200
4,2024-04-10 11:59:59.500492,full,2024-04-10 11:59:20,2024-04-10 12:21:20,1320,True,60fde5e29e585481874a9d16,Non-binary,41,,Monthly,Agree,i figured out how to slow the drone down befor...,,Agree,0 days 00:20:45.190436,0 days 00:00:22.604352150,0 days 00:00:22.604352150,0 days 00:00:00.862269400


In [56]:
merged_filtered.to_csv(filtered_participant_file, index=False)

### What is associated with overall feedback helpfulness rating?

### Aggregate trial survey responses across conditions

In [30]:
# choose the most common answer per participant per column
# this is so each participant only appears once
most_common = all_responses.groupby('prolific_id').agg(lambda x: x.value_counts().idxmax())
most_common.head()

Unnamed: 0_level_0,trial,motivation,manageable,actionable,timely,reflection,outcome,condition
prolific_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
569b36e3af9ad70006640dd9,1,Strongly Disagree,Much too little,Strongly Disagree,Just enough,Strongly Disagree,Crash,score
56bae08f30d6b30005f8537a,1,Strongly Disagree,Much too much,Disagree,Much too often,Strongly Disagree,Safe,full
5760a995f371330006a47cb3,1,Neutral,Just right,Agree,Just enough,Neutral,Unsafe,full
5834a80a106ee1000120abd3,1,Strongly Agree,Just right,Strongly Agree,Just enough,Strongly Agree,Crash,text
587674406e2f8d00015c6148,1,Agree,Just right,Agree,Just enough,Agree,Safe,score


In [31]:
most_common.shape

(140, 8)

**MOTIVATION**

In [32]:
motivation_responses = most_common.groupby('condition')['motivation'].value_counts(normalize=True).unstack().round(2).sort_index()
motivation_responses

motivation,Strongly Disagree,Disagree,Neutral,Agree,Strongly Agree
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,0.02,0.13,0.32,0.32,0.21
text,0.13,0.04,0.17,0.43,0.23
full,0.04,0.09,0.28,0.41,0.17


In [33]:
motivation_freq = most_common.groupby('condition')['motivation'].value_counts(normalize=False).unstack().round(2).sort_index()
motivation_freq # need to do a non-parametric test

motivation,Strongly Disagree,Disagree,Neutral,Agree,Strongly Agree
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,1,6,15,15,10
text,6,2,8,20,11
full,2,4,13,19,8


In [34]:
motivation_grouped = most_common.groupby('condition')['motivation'].apply(list)
motivation_grouped

condition
score    [Strongly Disagree, Agree, Disagree, Agree, St...
text     [Strongly Agree, Strongly Disagree, Agree, Agr...
full     [Strongly Disagree, Neutral, Neutral, Neutral,...
Name: motivation, dtype: object

In [35]:
data_for_test = [group for group in motivation_grouped]
stats.kruskal(*data_for_test)

KruskalResult(statistic=0.7077953154939997, pvalue=0.7019468024966999)

**MANAGEABLE**

In [36]:
manageable_responses = most_common.groupby('condition')['manageable'].value_counts(normalize=True).unstack().round(2).sort_index()
manageable_responses

manageable,Much too little,Too little,Just right,Too much,Much too much
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,0.06,0.21,0.57,0.06,0.09
text,0.06,0.04,0.57,0.19,0.13
full,0.02,0.09,0.63,0.15,0.11


In [37]:
manageable_freq = most_common.groupby('condition')['manageable'].value_counts(normalize=False).unstack().round(2).sort_index()
manageable_freq

manageable,Much too little,Too little,Just right,Too much,Much too much
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,3,10,27,3,4
text,3,2,27,9,6
full,1,4,29,7,5


In [38]:
manageable_grouped = most_common.groupby('condition')['manageable'].apply(list)
manageable_grouped

condition
score    [Much too little, Just right, Too little, Too ...
text     [Just right, Much too much, Just right, Just r...
full     [Much too much, Just right, Just right, Just r...
Name: manageable, dtype: object

In [39]:
data_for_test = [group for group in manageable_grouped]
stats.kruskal(*data_for_test)

KruskalResult(statistic=0.19344660157345597, pvalue=0.9078071609117324)

**ACTIONABLE**

In [40]:
actionable_responses = most_common.groupby('condition')['actionable'].value_counts(normalize=True).unstack().round(2).sort_index()
actionable_responses

actionable,Strongly Disagree,Disagree,Neutral,Agree,Strongly Agree
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,0.15,0.26,0.28,0.17,0.15
text,0.06,0.09,0.15,0.49,0.21
full,0.02,0.09,0.24,0.52,0.13


In [41]:
actionable_freq = most_common.groupby('condition')['actionable'].value_counts(normalize=False).unstack().round(2).sort_index()
actionable_freq

actionable,Strongly Disagree,Disagree,Neutral,Agree,Strongly Agree
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,7,12,13,8,7
text,3,4,7,23,10
full,1,4,11,24,6


In [42]:
actionable_grouped = most_common.groupby('condition')['actionable'].apply(list)
actionable_grouped

condition
score    [Strongly Disagree, Agree, Disagree, Strongly ...
text     [Strongly Agree, Strongly Disagree, Neutral, N...
full     [Disagree, Agree, Neutral, Neutral, Agree, Agr...
Name: actionable, dtype: object

In [43]:
data_for_test = [group for group in actionable_grouped]
stats.kruskal(*data_for_test)

KruskalResult(statistic=9.531209073870448, pvalue=0.00851773739875785)

In [44]:
# do Dunn's Test to check which groups are different from each other
sp.posthoc_dunn(most_common, val_col='actionable', group_col='condition', p_adjust='bonferroni')

Unnamed: 0,score,text,full
score,1.0,0.002544,0.0126
text,0.002544,1.0,1.0
full,0.0126,1.0,1.0


**TIMELY**

In [45]:
timely_responses = most_common.groupby('condition')['timely'].value_counts(normalize=True).unstack().round(2).sort_index()
timely_responses

timely,Much too infrequent,Too infrequent,Just enough,Too often,Much too often
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,0.02,0.06,0.62,0.19,0.11
text,0.02,0.0,0.6,0.17,0.21
full,0.0,0.0,0.74,0.13,0.13


In [46]:
timely_freq = most_common.groupby('condition')['timely'].value_counts(normalize=False).unstack().round(2).sort_index()
timely_freq

timely,Much too infrequent,Too infrequent,Just enough,Too often,Much too often
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,1,3,29,9,5
text,1,0,28,8,10
full,0,0,34,6,6


In [47]:
timely_grouped = most_common.groupby('condition')['timely'].apply(list)
timely_grouped

condition
score    [Just enough, Just enough, Too often, Just eno...
text     [Just enough, Much too often, Just enough, Jus...
full     [Much too often, Just enough, Just enough, Muc...
Name: timely, dtype: object

In [48]:
data_for_test = [group for group in timely_grouped]
stats.kruskal(*data_for_test)

KruskalResult(statistic=2.1658535335540203, pvalue=0.33860306180640587)

**REFLECTION**

In [49]:
reflection_responses = most_common.groupby('condition')['reflection'].value_counts(normalize=True).unstack().round(2).sort_index()
reflection_responses

reflection,Strongly Disagree,Disagree,Neutral,Agree,Strongly Agree
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,0.09,0.11,0.17,0.38,0.26
text,0.06,0.06,0.21,0.43,0.23
full,0.04,0.04,0.22,0.46,0.24


In [50]:
reflection_freq = most_common.groupby('condition')['reflection'].value_counts(normalize=False).unstack().round(2).sort_index()
reflection_freq

reflection,Strongly Disagree,Disagree,Neutral,Agree,Strongly Agree
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
score,4,5,8,18,12
text,3,3,10,20,11
full,2,2,10,21,11


In [51]:
reflection_grouped = most_common.groupby('condition')['reflection'].apply(list)
reflection_grouped

condition
score    [Strongly Disagree, Agree, Disagree, Agree, St...
text     [Strongly Agree, Disagree, Agree, Agree, Agree...
full     [Strongly Disagree, Neutral, Neutral, Neutral,...
Name: reflection, dtype: object

In [52]:
data_for_test = [group for group in reflection_grouped]
stats.kruskal(*data_for_test)

KruskalResult(statistic=0.4678752710323547, pvalue=0.7914111703057143)

## Did they think the feedback helped?

In [57]:
overall_responses = merged.groupby('condition')['feedback_helped'].value_counts(normalize=True).unstack().round(2).sort_index()
overall_responses.drop(index=['demo'])

feedback_helped,Strongly Disagree,Disagree,Neutral,Agree,Strongly Agree
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
full,0.09,0.13,0.23,0.43,0.13
score,0.22,0.14,0.18,0.28,0.18
text,0.16,0.08,0.06,0.54,0.16


In [58]:
overall_freq = merged.groupby('condition')['feedback_helped'].value_counts(normalize=False).unstack().round(2).sort_index()
overall_freq.drop(index=['demo'])

feedback_helped,Strongly Disagree,Disagree,Neutral,Agree,Strongly Agree
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
full,4,6,11,20,6
score,11,7,9,14,9
text,8,4,3,27,8


In [61]:
overall_grouped = merged.groupby('condition')['feedback_helped'].apply(list)
overall_grouped = overall_grouped.drop(index=['demo'])
overall_grouped

condition
full     [Neutral, Strongly Agree, Disagree, Neutral, A...
score    [Disagree, Neutral, Disagree, Strongly Agree, ...
text     [Agree, Agree, Agree, Strongly Disagree, Agree...
Name: feedback_helped, dtype: object

In [62]:
data_for_test = [group for group in overall_grouped]
stats.kruskal(*data_for_test)

KruskalResult(statistic=5.501401384070903, pvalue=0.0638830831531614)

## Which feedback modality leads to higher performance improvements?

# Start looking at trajectory data

Grab all the images

In [None]:
# make images folder if it doesn't exist
if not os.path.exists(data_dir + 'images'):
    os.makedirs(data_dir + 'images')
    os.makedirs(data_dir + 'images/raw')
    os.makedirs(data_dir + 'images/processed')

# for each participant, save image from each trial
user_dirs = os.listdir(data_dir)
for user in conditions['user_id']:
    print(user)
    if user not in user_dirs:
        continue
    for trials in os.listdir(data_dir + user):
        if not os.path.isdir(data_dir + user + '/' + trials):
            continue
        raw_image = data_dir + user + '/' + trials + '/trajectory.png'
        processed_image = data_dir + user + '/' + trials + '/trajectory_with_feedback.png'
        # copy images to images folder
        if not os.path.exists(raw_image) or not os.path.exists(processed_image):
            continue
        shutil.copy(raw_image, data_dir + 'images/raw/' + user + '_' + trials + '_raw.png')
        shutil.copy(processed_image, data_dir + 'images/processed/' + user + '_' + trials + '_processed.png')