# Code for creating a clean file ready for data analysis

In [None]:
# load packages here
import pandas as pd

In [None]:
# read the files
"""
READ ME!

Once the data collection is completed, change the PATH to the correct files for both `qualtrics` and `logbook`,
run all cells in this Jupyter notebook, and then a clean CSV file will be ready for the data analysis.
"""

qualtrics = pd.read_excel("Robots and social influence_November 10, 2025_20.43.xlsx") # download from Qualtrics
logbook = pd.read_excel("Logbook.xlsx") # download from "Research assistant" folder in Teams

### Data Wrangling
### *Qualtrics* 

In [None]:
qualtrics.head()

In [None]:
qualtrics.columns

In [None]:
# Drop columns that are not important for further analyses
qualtrics = qualtrics.drop(['StartDate','EndDate', 'Informed_consent','Progress','Status', 'Duration (in seconds)', 'Finished', 'RecordedDate', 'ResponseId', 'DistributionChannel', 'UserLanguage', 'Timer_First Click', 'Timer_Last Click',
       'Timer_Page Submit', 'Timer_Click Count', 'Data_submission'], axis = 1)

In [None]:
# Drop first row which shows descriptions of each column
qualtrics = qualtrics.loc[1:]

In [None]:
# Reset index for merging
qualtrics = qualtrics.reset_index(drop=True)

In [None]:
qualtrics.head()

### *Logbook* 

In [None]:
logbook.head()

In [None]:
# Drop the first 8 rows which were the instructions for research assistant to record the codebook
logbook = logbook[8:]

In [None]:
# Rename the columns in the table
new_header = logbook.iloc[0]  # take the first row as header
logbook.columns = new_header # set new header

In [None]:
# Drop the first row (new) wich was used to rename the columns
logbook = logbook[1:]

In [None]:
logbook.columns

In [None]:
# Only keep column that can compliment information to the qualtrics file
logbook = logbook[['Error code ']]

**Note**: 
- error code 0 = no error
- error code 1 = minor error
- error code 2 = major error
- error code 3 = participant did not show up (either being simply absent or was sent away due to major technical issues before the experiment began)

Participants who did not show up don't have responses for Qualtrics, so we need to exclude them before merging the files.

In [None]:
# Exclude participants who did not show up
logbook = logbook[logbook['Error code '] != 3]

In [None]:
# Reset index for merging
logbook = logbook.reset_index(drop=True)

In [None]:
logbook

### Data Merging
Responses in the `qualtrics` and `logbook` files are recorded chronologically (top to bottom), with each row corresponding to the same participant across both datasets. So we merge them by index.

In [None]:
df_all = pd.merge(logbook, qualtrics, left_index=True, right_index=True)

In [None]:
print(f"{len(df_all[df_all['Error code '] == 2])} participants experienced major technical issues during their participation.")

In [None]:
# Exclude participants experienced major technical issues
df = df_all[df_all['Error code '] != 2]

In [None]:
df

In [None]:
df.to_csv("robots_socialInfluence.csv", index=False)