# Run this on startup

In [1]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Open all of the relevant data files/directories

In [9]:
data_dir = '../static/data/'

conditions_file = data_dir + 'conditions.csv'
conditions = pd.read_csv(conditions_file)

exit_survey_file = data_dir + 'exit_survey.csv'
exit_survey = pd.read_csv(exit_survey_file)
exit_survey = exit_survey.drop([0,1]) # drop extra qualtrics rows

Define mapping between qualtrics question number and something understandable by people. Only relevant columns are defined here.

In [32]:
qualtrics_columns = {
    'start_date': 'StartDate',
    'end_date': 'EndDate',
    'duration': 'Duration (in seconds)',
    'finished': 'Finished', # 1 is finished
    'prolific_id': 'Q9', # user inputted prolific id
    'gender': 'Q1', # mapping below
    'age': 'Q8', # number input
    'drone_experience': 'Q3', # mapping below
    'video_game_experience': 'Q4', # mapping below
    'feedback_helped': 'Q5', # likert mapping below
    'change_from_feedback': 'Q6', # open text response
    'comments': 'Q7' # open text response, optional
}

gender_map = {'1': "Man", '2': "Woman", '3': "Non-binary", '4': "Prefer not to say"}
drone_map = {'1': "None", '2': "Some", '3': "Regularly", '4': "Professional"}
game_map = {'1': "None", '2': "Monthly", '3': "Weekly", '4': "Daily"}
likert_map = {'1': "Strongly Disagree", '2': "Disagree", '3': "Neutral", '4': "Agree", '5': "Strongly Agree"}

# Check participant data

## Match data sources

Do participants match between data from webpage and data from exit survey?

In [15]:
web_participants = conditions['user_id']
exit_survey_participants = exit_survey[qualtrics_columns['prolific_id']]

# check if all web participants are in exit survey
print("Web participants not in exit survey:")
for user_id in web_participants:
    if user_id not in exit_survey_participants:
        print("\t" + user_id)

# check if all exit survey participants are in web
print("Exit survey participants not in web:")
for user_id in exit_survey_participants:
    if user_id not in web_participants:
        print("\t" + user_id)

Web participants not in exit survey:
	breanne
	emily
	emily
	Shivendra Agrawal
	emily
	emily
	emily
	emily
	emily
	emily
Exit survey participants not in web:
	b
	Shivendra
	8aksf09q
	apel09210h
	9sk3h59s
	9wbnns76
	s92hfks3


Notes about manually fixing mismatches between web and exit survey data:
- Example here

## Distributions of demographic data

In [25]:
gender_counts = exit_survey[qualtrics_columns['gender']].value_counts()
gender_counts_renamed = gender_counts.rename(index=gender_map)
gender_counts_renamed

Q1
Man                  3
Woman                2
Non-binary           1
Prefer not to say    1
Name: count, dtype: int64

In [27]:
exit_survey[qualtrics_columns['age']].astype(int).describe()

count     7.000000
mean     36.000000
std      19.373521
min      19.000000
25%      24.500000
50%      31.000000
75%      38.500000
max      76.000000
Name: Q8, dtype: float64

In [30]:
drone_experience_counts = exit_survey[qualtrics_columns['drone_experience']].value_counts()
drone_experience_counts_renamed = drone_experience_counts.rename(index=drone_map)
drone_experience_counts_renamed

Q3
Some            4
None            1
Regularly       1
Professional    1
Name: count, dtype: int64

In [33]:
game_experience_counts = exit_survey[qualtrics_columns['video_game_experience']].value_counts()
game_experience_counts_renamed = game_experience_counts.rename(index=game_map)
game_experience_counts_renamed

Q4
Weekly     3
None       2
Monthly    1
Daily      1
Name: count, dtype: int64

In [34]:
feedback_helped_counts = exit_survey[qualtrics_columns['feedback_helped']].value_counts()
feedback_helped_counts_renamed = feedback_helped_counts.rename(index=likert_map)
feedback_helped_counts_renamed

Q5
Agree                3
Strongly Disagree    1
Disagree             1
Neutral              1
Strongly Agree       1
Name: count, dtype: int64