I'll begin by importing data from the CSV file, and then preprocessing some of the data.

In [1]:
import pandas as pd

df = pd.read_csv('Tuning In to Tempo.csv')

# Remove timestamps
df = df.drop(columns='Timestamp')

# Note that some of the values for these times are invalid/unusual for reaction times
df[['Time #1 (in ms)', 'Time #2 (in ms)','Time #3 (in ms)']]

Unnamed: 0,Time #1 (in ms),Time #2 (in ms),Time #3 (in ms)
0,306.0,291.0,342.0
1,357.0,382.0,316.0
2,386.0,346.0,344.0
3,342.0,258.0,289.0
4,244.0,285.0,282.0
5,270.0,211.0,106.0
6,248.0,306.0,225.0
7,301.0,322.0,255.0
8,381.0,319.0,292.0
9,276.0,283.0,253.0


It's evident that the 21st row has falsified data, so I'll be removing the entire row. The 14th row also has an extremely large reaction time, so I'll be replacing it with the average of the other two values.

In [6]:
df = df.drop(labels=21, axis=0)
df.at[14,'Time #1 (in ms)'] = int((df.at[14,'Time #2 (in ms)'] + df.at[14,'Time #3 (in ms)']) / 2)

df

Unnamed: 0,Age,Mood,Energy,Gender,Time #1 (in ms),Time #2 (in ms),Time #3 (in ms),Time #1 (in ms).1,Time #2 (in ms).1,Time #3 (in ms).1,...,Time #3 (in ms).2,Have you heard of this song before?.1,Time #1 (in ms).3,Time #2 (in ms).3,Time #3 (in ms).3,Have you heard of this song before?.2,Time #1 (in ms).4,Time #2 (in ms).4,Time #3 (in ms).4,Have you heard of this song before?.3
0,20,4,4,Man,306.0,291.0,342.0,347,353,339,...,304,No,314,556,310,No,293,316,325,No
1,20,3,2,Woman,357.0,382.0,316.0,341,336,384,...,425,No,376,355,412,No,355,344,369,No
2,19,3,2,Man,386.0,346.0,344.0,280,253,306,...,296,No,236,263,286,No,347,344,286,No
3,22,3,4,Woman,342.0,258.0,289.0,267,289,242,...,217,No,277,238,311,No,296,273,290,No
4,25,2,2,Woman,244.0,285.0,282.0,276,283,263,...,282,No,254,250,262,No,329,316,294,No
5,23,2,3,Woman,270.0,211.0,106.0,273,276,247,...,253,No,239,233,231,Yes,260,257,276,No
6,23,4,3,Man,248.0,306.0,225.0,233,258,313,...,273,No,232,280,215,No,209,321,218,Yes
7,20,4,3,Woman,301.0,322.0,255.0,338,259,255,...,286,No,306,312,298,No,301,298,205,No
8,19,2,1,Woman,381.0,319.0,292.0,284,307,307,...,269,No,248,289,266,No,313,363,303,Yes
9,18,3,2,Woman,276.0,283.0,253.0,268,266,272,...,267,No,273,255,255,No,272,283,266,No


I can now compute the average reaction times, and organize the data.

In [7]:
# Convert discrete labels to numerical values
df['Gender'] = df['Gender'].apply(lambda x: 0 if x == "Man" else (1 if x == "Woman" else 2))

# Compute the average for the base time
df['Avg Base Time (ms)'] = df[['Time #1 (in ms)', 'Time #2 (in ms)','Time #3 (in ms)']].mean(axis=1).round(0)

for i in range(1, 5):
    # Compute the average for the reaction times for each song
    df[f'Avg Reaction Time #{i} (ms)'] = df[[f'Time #1 (in ms).{i}', f'Time #2 (in ms).{i}',f'Time #3 (in ms).{i}']].mean(axis=1).round(0)
    # Convert the discrete labels for having heard the song before to binary values
    if i == 1: 
        df[f'Heard Before Song #{i}'] = df[f'Have you heard of this song before?'].apply(lambda x: 0 if x == "No" else 1)
    else:
        df[f'Heard Before Song #{i}'] = df[f'Have you heard of this song before?.{i-1}'].apply(lambda x: 0 if x == "No" else 1)

# Remove the old columns
df.drop(df.iloc[:, 4:-9], axis=1, inplace=True)

df

Unnamed: 0,Age,Mood,Energy,Gender,Avg Base Time (ms),Avg Reaction Time #1 (ms),Heard Before Song #1,Avg Reaction Time #2 (ms),Heard Before Song #2,Avg Reaction Time #3 (ms),Heard Before Song #3,Avg Reaction Time #4 (ms),Heard Before Song #4
0,20,4,4,0,313.0,346.0,1,333.0,0,393.0,0,311.0,0
1,20,3,2,1,352.0,354.0,1,391.0,0,381.0,0,356.0,0
2,19,3,2,0,359.0,280.0,1,303.0,0,262.0,0,326.0,0
3,22,3,4,1,296.0,266.0,1,227.0,0,275.0,0,286.0,0
4,25,2,2,1,270.0,274.0,1,283.0,0,255.0,0,313.0,0
5,23,2,3,1,196.0,265.0,1,212.0,0,234.0,1,264.0,0
6,23,4,3,0,260.0,268.0,1,247.0,0,242.0,0,249.0,1
7,20,4,3,1,293.0,284.0,1,294.0,0,305.0,0,268.0,0
8,19,2,1,1,331.0,299.0,1,278.0,0,268.0,0,326.0,1
9,18,3,2,1,271.0,269.0,1,272.0,0,261.0,0,274.0,0


In [8]:
alternate_df = df.copy(deep=True)

# Add columns for the deviation of the reaction times from the base time
for i in range(1, 5):
    alternate_df[f'Reaction Time Deviation #{i}'] = alternate_df.apply(lambda row: row[f'Avg Reaction Time #{i} (ms)'] - row['Avg Base Time (ms)'], axis=1)
    
alternate_df

Unnamed: 0,Age,Mood,Energy,Gender,Avg Base Time (ms),Avg Reaction Time #1 (ms),Heard Before Song #1,Avg Reaction Time #2 (ms),Heard Before Song #2,Avg Reaction Time #3 (ms),Heard Before Song #3,Avg Reaction Time #4 (ms),Heard Before Song #4,Reaction Time Deviation #1,Reaction Time Deviation #2,Reaction Time Deviation #3,Reaction Time Deviation #4
0,20,4,4,0,313.0,346.0,1,333.0,0,393.0,0,311.0,0,33.0,20.0,80.0,-2.0
1,20,3,2,1,352.0,354.0,1,391.0,0,381.0,0,356.0,0,2.0,39.0,29.0,4.0
2,19,3,2,0,359.0,280.0,1,303.0,0,262.0,0,326.0,0,-79.0,-56.0,-97.0,-33.0
3,22,3,4,1,296.0,266.0,1,227.0,0,275.0,0,286.0,0,-30.0,-69.0,-21.0,-10.0
4,25,2,2,1,270.0,274.0,1,283.0,0,255.0,0,313.0,0,4.0,13.0,-15.0,43.0
5,23,2,3,1,196.0,265.0,1,212.0,0,234.0,1,264.0,0,69.0,16.0,38.0,68.0
6,23,4,3,0,260.0,268.0,1,247.0,0,242.0,0,249.0,1,8.0,-13.0,-18.0,-11.0
7,20,4,3,1,293.0,284.0,1,294.0,0,305.0,0,268.0,0,-9.0,1.0,12.0,-25.0
8,19,2,1,1,331.0,299.0,1,278.0,0,268.0,0,326.0,1,-32.0,-53.0,-63.0,-5.0
9,18,3,2,1,271.0,269.0,1,272.0,0,261.0,0,274.0,0,-2.0,1.0,-10.0,3.0
