# Run this on startup

In [1]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Open all of the relevant data files/directories

In [34]:
data_dir = '../static/data/'

conditions_file = data_dir + 'conditions.csv'
conditions = pd.read_csv(conditions_file)

exit_survey_file = data_dir + 'exit_survey.csv'
exit_survey = pd.read_csv(exit_survey_file)

Process qualtrics data as needed.

In [35]:
qualtrics_columns = {
    'StartDate': 'start_date',
    'EndDate': 'end_date',
    'Duration (in seconds)': 'duration',
    'Finished': 'is_finished', # 1 is finished
    'Q9': 'prolific_id', # user inputted prolific id
    'Q1': 'gender', # mapping below
    'Q8': 'age', # number input
    'Q3': 'drone_experience', # mapping below
    'Q4': 'video_game_experience', # mapping below
    'Q5': 'feedback_helped', # likert mapping below
    'Q6': 'change_from_feedback', # open text response
    'Q7': 'comments' # open text response, optional
}

# rename columns
exit_survey = exit_survey.rename(columns=qualtrics_columns)

# remove extra columns and rows
exit_survey = exit_survey.drop(columns=['Status', 'Progress', 'RecordedDate', 'ResponseId', 'DistributionChannel', 'UserLanguage'])
exit_survey = exit_survey.drop([0,1])

# set data types
exit_survey['start_date'] = pd.to_datetime(exit_survey['start_date'])
exit_survey['end_date'] = pd.to_datetime(exit_survey['end_date'])
exit_survey['duration'] = pd.to_numeric(exit_survey['duration'])
exit_survey['is_finished'] = exit_survey['is_finished'].astype(bool)
exit_survey['age'] = pd.to_numeric(exit_survey['age'])

# process categorical data
gender_map = {'1': "Man", '2': "Woman", '3': "Non-binary", '4': "Prefer not to say"}
exit_survey['gender'] = exit_survey['gender'].replace(gender_map)
exit_survey['gender'] = pd.Categorical(exit_survey['gender'])

drone_map = {'1': "None", '2': "Some", '3': "Regularly", '4': "Professional"}
exit_survey['drone_experience'] = exit_survey['drone_experience'].replace(drone_map)
exit_survey['drone_experience'] = pd.Categorical(exit_survey['drone_experience'], categories=['None', 'Some', 'Regularly', 'Professional'], ordered=True)

game_map = {'1': "None", '2': "Monthly", '3': "Weekly", '4': "Daily"}
exit_survey['video_game_experience'] = exit_survey['video_game_experience'].replace(game_map)
exit_survey['video_game_experience'] = pd.Categorical(exit_survey['video_game_experience'], categories=['None', 'Monthly', 'Weekly', 'Daily'], ordered=True)

likert_map = {'1': "Strongly Disagree", '2': "Disagree", '3': "Neutral", '4': "Agree", '5': "Strongly Agree"}
exit_survey['feedback_helped'] = exit_survey['feedback_helped'].replace(likert_map)
exit_survey['feedback_helped'] = pd.Categorical(exit_survey['feedback_helped'], categories=['Strongly Disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly Agree'], ordered=True)

exit_survey

Unnamed: 0,start_date,end_date,duration,is_finished,prolific_id,gender,age,drone_experience,video_game_experience,feedback_helped,change_from_feedback,comments
2,2024-02-16 13:19:00,2024-02-16 13:44:00,1484,True,b,Woman,24,Some,Weekly,Agree,asd,asd
3,2024-02-20 11:39:00,2024-02-20 12:19:00,2394,True,Shivendra,Man,31,Some,,Agree,Yes,
4,2024-02-21 12:23:00,2024-02-21 12:24:00,53,True,8aksf09q,Man,19,,,Strongly Disagree,I hated this,
5,2024-02-21 12:24:00,2024-02-21 12:25:00,24,True,apel09210h,Woman,76,Some,Monthly,Disagree,it was awesome,
6,2024-02-21 12:25:00,2024-02-21 12:25:00,23,True,9sk3h59s,Non-binary,33,Regularly,Weekly,Neutral,meh,
7,2024-02-21 12:25:00,2024-02-21 12:26:00,24,True,9wbnns76,Prefer not to say,25,Professional,Daily,Agree,a fake answer,
8,2024-02-21 12:26:00,2024-02-21 12:26:00,22,True,s92hfks3,Man,44,Some,Weekly,Strongly Agree,what,


# Check participant data

## Match data sources

Do participants match between data from webpage and data from exit survey?

In [37]:
web_participants = conditions['user_id']
exit_survey_participants = exit_survey['prolific_id']

# check if all web participants are in exit survey
print("Web participants not in exit survey:")
for user_id in web_participants:
    if user_id not in exit_survey_participants:
        print("\t" + user_id)

# check if all exit survey participants are in web
print("Exit survey participants not in web:")
for user_id in exit_survey_participants:
    if user_id not in web_participants:
        print("\t" + user_id)

Web participants not in exit survey:
	breanne
	emily
	emily
	Shivendra Agrawal
	emily
	emily
	emily
	emily
	emily
	emily
Exit survey participants not in web:
	b
	Shivendra
	8aksf09q
	apel09210h
	9sk3h59s
	9wbnns76
	s92hfks3


Notes about manually fixing mismatches between web and exit survey data:
- Example here

## Distributions of demographic data

In [39]:
exit_survey['gender'].value_counts()


gender
Man                  3
Woman                2
Non-binary           1
Prefer not to say    1
Name: count, dtype: int64

In [41]:
exit_survey['age'].describe().round(1)

count     7.0
mean     36.0
std      19.4
min      19.0
25%      24.5
50%      31.0
75%      38.5
max      76.0
Name: age, dtype: float64

In [50]:
exit_survey['drone_experience'].value_counts().sort_index()

drone_experience
None            1
Some            4
Regularly       1
Professional    1
Name: count, dtype: int64

In [49]:
exit_survey['video_game_experience'].value_counts().sort_index()

video_game_experience
None       2
Monthly    1
Weekly     3
Daily      1
Name: count, dtype: int64

In [48]:
exit_survey['feedback_helped'].value_counts().sort_index()

feedback_helped
Strongly Disagree    1
Disagree             1
Neutral              1
Agree                3
Strongly Agree       1
Name: count, dtype: int64

# Research Questions

## How do learners perceive the feedback along each dimension?

## Which feedback modality leads to higher performance improvements?