In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import numpy as np


In [16]:
## Basic statistics of the data ## 
# Load the cleaned dataset
df = pd.read_csv('../data/Eye-tracking Output/cleaned_data.csv')

# Creating a subset with unique participants
unique_participants_df = df.drop_duplicates(subset='Participant')

# Calculate the mean age of unique participants
mean_age = unique_participants_df['Age'].mean()
print("Mean age of unique participants:", round(mean_age, 2))

# Count unique participants for each gender
unique_gender_counts = unique_participants_df.groupby('Gender')['Participant'].nunique()
print("\nUnique participants by gender:")
print(unique_gender_counts)

# Count unique participants for each class (ASD or TD)
unique_class_counts = unique_participants_df.groupby('Class')['Participant'].nunique()
print("\nUnique participants by class (ASD or TD):")
print(unique_class_counts)

# CARS mean score for ASD participants
as_participants = unique_participants_df[unique_participants_df['Class'] == 'ASD']
mean_cars_score = as_participants['CARS Score'].mean()
print("\nMean CARS score for ASD participants:", round(mean_cars_score, 2))

# Total number of unique participants
n_participants = unique_participants_df['Participant'].nunique()
print("\nTotal unique participants:", n_participants)

# Number of unique stimuli (consisting of videos and photos)
n_unique_stimuli = df['Stimulus'].nunique()
print("Number of unique stimuli:", n_unique_stimuli)

# General statistics of the data
df.info()



Mean age of unique participants: 7.8

Unique participants by gender:
Gender
F    20
M    36
Name: Participant, dtype: int64

Unique participants by class (ASD or TD):
Class
ASD    27
TD     29
Name: Participant, dtype: int64

Mean CARS score for ASD participants: 32.65

Total unique participants: 56
Number of unique stimuli: 114
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 905519 entries, 0 to 905518
Data columns (total 20 columns):
 #   Column                        Non-Null Count   Dtype  
---  ------                        --------------   -----  
 0   Participant                   905519 non-null  int64  
 1   Trial                         905519 non-null  object 
 2   Stimulus                      905519 non-null  object 
 3   RecordingTime [ms]            905519 non-null  float64
 4   Export Start Trial Time [ms]  905519 non-null  float64
 5   Export End Trial Time [ms]    905519 non-null  float64
 6   Point of Regard Left X [px]   905519 non-null  object 
 7   Point of Rega

In [None]:
## a lil plotting for the lads ## 

# Load your dataset
df = pd.read_csv('../data/Eye-tracking Output/cleaned_data.csv')

# Convert columns to numeric type
df[['Point of Regard Left X [px]', 'Point of Regard Left Y [px]', 'Point of Regard Right X [px]', 'Point of Regard Right Y [px]']] = df[['Point of Regard Left X [px]', 'Point of Regard Left Y [px]', 'Point of Regard Right X [px]', 'Point of Regard Right Y [px]']].apply(pd.to_numeric, errors='coerce')

# Loop through each diagnosis and stimulus to create separate plots
for diagnosis in df['Class'].unique():
    for stimulus in df['Stimulus'].unique():
        # Filter the dataset for the current diagnosis and stimulus
        df_filtered = df[(df['Class'] == diagnosis) & (df['Stimulus'] == stimulus)]
        
        # We know from the data collection that the screen size was 1280 x 1024 pixels and we will remove any data points outside of this range
        df_filtered = df_filtered[(df_filtered['Point of Regard Left X [px]'] >= 0) & (df_filtered['Point of Regard Left X [px]'] <= 1280)]
        
        # Normalize data
        df_filtered[['Point of Regard Left X [px]', 'Point of Regard Left Y [px]', 'Point of Regard Right X [px]', 'Point of Regard Right Y [px]']] = (df_filtered[['Point of Regard Left X [px]', 'Point of Regard Left Y [px]', 'Point of Regard Right X [px]', 'Point of Regard Right Y [px]']] - df_filtered[['Point of Regard Left X [px]', 'Point of Regard Left Y [px]', 'Point of Regard Right X [px]', 'Point of Regard Right Y [px]']].min()) / (df_filtered[['Point of Regard Left X [px]', 'Point of Regard Left Y [px]', 'Point of Regard Right X [px]', 'Point of Regard Right Y [px]']].max() - df_filtered[['Point of Regard Left X [px]', 'Point of Regard Left Y [px]', 'Point of Regard Right X [px]', 'Point of Regard Right Y [px]']].min())
        
# Set the style of the seaborn plot
sns.set(style="whitegrid")

# Loop through each diagnosis and stimulus to create separate plots
for diagnosis in df['Class'].unique():
    for stimulus in df['Stimulus'].unique():
        # Filter the dataset for the current diagnosis and stimulus
        df_filtered = df[(df['Class'] == diagnosis) & (df['Stimulus'] == stimulus)]
        
        plt.figure(figsize=(10, 5))
        
        # Plotting gaze points for the left eye
        sns.kdeplot(x='Point of Regard Left X [px]', y='Point of Regard Left Y [px]', 
                    data=df_filtered, color='blue', fill=True)
        
        # Plotting gaze points for the right eye
        sns.kdeplot(x='Point of Regard Right X [px]', y='Point of Regard Right Y [px]', 
                    data=df_filtered, color='red', fill=True)
        
        # Add a title to the plot
        plt.title(f'Gaze Plot for {diagnosis} and {stimulus}')
        
        # Invert the y-axis
        plt.gca().invert_yaxis()
        
        # Save the plot wtih the diagnosis and stimulus in the filename
        plt.savefig(f'../data/gaze_plots/{diagnosis} - {stimulus}.png')
        