In [1]:
import os
import pandas as pd
import csv
import math
import numpy as np
from ipywidgets import Dropdown
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr


In [17]:
folder_path = 'SaliencyData/Task1'

def load_data(file):
    file_path = os.path.join(folder_path, file)
    data = pd.read_csv(file_path, delimiter=';', usecols=['GazeTime', 'Region', 'Target', 'PosX', 'PosY', 'PosZ', 'PlayerPosX', 'PlayerPosY', 'PlayerPosZ', 'LightIntensity', 'Velocity'])
    data = data[~((data['PosX'] == 0) & (data['PosY'] == 0) & (data['PosZ'] == 0))]

    if len(data) <= 1:  # The data contains only a header
        return None

    # Get the unique regions
    regions = data['Region'].unique()

    # Skip the first region
    for region in regions[1:]:
        # Get the index of the first row of the current region
        first_row_index = data[data['Region'] == region].index[0]
        # Drop the first row of the current region
        data = data.drop(first_row_index)

    # Normalize position data using Min-Max normalization
    pos_columns = ['PosX', 'PosY', 'PosZ', 'PlayerPosX', 'PlayerPosY', 'PlayerPosZ', 'LightIntensity', 'Velocity']
    for column in pos_columns:
        data[column] = (data[column] - data[column].min()) / (data[column].max() - data[column].min())

    return data

# Create a list to store the dataframes
dataframes = [df for df in (load_data(file) for file in os.listdir(folder_path) if file.endswith('.csv')) if df is not None]
os.listdir(folder_path)

['.ipynb_checkpoints',
 'GazeData_1.csv',
 'GazeData_11.csv',
 'GazeData_12.csv',
 'GazeData_13.csv',
 'GazeData_14.csv',
 'GazeData_15.csv',
 'GazeData_16.csv',
 'GazeData_17.csv',
 'GazeData_18.csv',
 'GazeData_19.csv',
 'GazeData_2.csv',
 'GazeData_20.csv',
 'GazeData_21.csv',
 'GazeData_22.csv',
 'GazeData_23.csv',
 'GazeData_26.csv',
 'GazeData_27.csv',
 'GazeData_28.csv',
 'GazeData_29.csv',
 'GazeData_3.csv',
 'GazeData_30.csv',
 'GazeData_31.csv',
 'GazeData_32.csv',
 'GazeData_4.csv',
 'GazeData_5.csv',
 'GazeData_6.csv',
 'GazeData_7.csv',
 'GazeData_8.csv']

In [30]:
def correct_regions(df):
    # Define valid targets for each region
    valid_targets = {
        1: ['Size(Min)', 'Size(Low)', 'Size(Mid)', 'Size(Max)','None'],
        2: ['Transparency(Min)', 'Transparency(Low)', 'Transparency(Mid)', 'Transparency(Max)','None'],
        3: ['Red', 'White', 'Black', 'Blue','None']
    }
    
    # Define a function to find the correct region for a given target
    def find_correct_region(row):
        if row['Target'] == 'None':
            return row['Region']  # Keep the current region if the target is 'None'
        for region, targets in valid_targets.items():
            if row['Target'] in targets:
                return region
        return row['Region']  # Keep the current region if the target does not belong to any defined region

    # Apply the function to each row in the DataFrame
    df['Corrected_Region'] = df.apply(find_correct_region, axis=1)
    # Replace 'Region' column with 'Corrected_Region', then drop 'Corrected_Region'
    df['Region'] = df['Corrected_Region']
    df.drop('Corrected_Region', axis=1, inplace=True)
    
    return df

# Apply the function to each DataFrame in your list
corrected_dfs = [correct_regions(df) for df in dataframes]

corrected_dfs[0]

Unnamed: 0,GazeTime,Region,Target,PosX,PosY,PosZ,PlayerPosX,PlayerPosY,PlayerPosZ,LightIntensity,Velocity
0,0.166807,1.0,Size(Max),0.441291,0.450383,0.898136,0.0,0.56943,0.0,,
1,0.208184,1.0,Size(Max),0.501019,0.160109,1.0,0.0,0.504816,0.113474,,
2,0.250302,1.0,Size(Max),0.584421,0.085556,0.985626,0.0,0.427128,0.249906,,
3,0.306064,1.0,Size(Max),0.612888,0.0,0.969624,0.0,0.332133,0.416732,,
4,0.097383,1.0,Size(Max),0.570646,0.520018,0.787474,0.0,0.301907,0.469812,,
5,0.097494,1.0,Size(Max),0.692646,0.038718,0.964657,0.0,0.271646,0.522953,,
6,0.110868,1.0,Size(Max),0.75592,0.051888,0.960085,0.0,0.237235,0.583383,,
7,0.111373,,,0.823683,0.065725,0.954646,0.0,0.202668,0.644089,,
8,0.111236,,,0.889889,0.086104,0.948992,0.0,0.168141,0.70472,,
9,0.111098,,,0.946413,0.112806,0.943876,0.0,0.13366,0.765276,,


In [81]:
def target_counts(df):
    # Filter out rows where Target is 'None'
    df_filtered = df[df['Target'] != 'None']
    # Count the number of times each 'Target' was looked at
    target_counts = df_filtered['Target'].value_counts().reset_index()
    target_counts.columns = ['Target', 'Count']
    return target_counts

# Use the target_counts function on each DataFrame in your list
all_target_counts = pd.concat([target_counts(df) for df in dataframes])

# Sum the counts for each 'Target' across all DataFrames
total_target_counts = all_target_counts.groupby('Target').sum().reset_index()

print(total_target_counts)


               Target  Count
0               Black     20
1                Blue      1
2                 Red     16
3           Size(Low)     12
4           Size(Max)     79
5           Size(Mid)     22
6           Size(Min)      6
7   Transparency(Low)     16
8   Transparency(Max)     20
9   Transparency(Mid)     19
10  Transparency(Min)     17
11              White     15


In [22]:
dataframes[26]

Unnamed: 0,GazeTime,Region,Target,PosX,PosY,PosZ,PlayerPosX,PlayerPosY,PlayerPosZ,LightIntensity,Velocity
0,0.278668,1,Size(Low),0.261395,0.555939,0.780676,0.0,0.597213,0.0,,
1,0.069575,1,Size(Low),0.273538,0.596371,0.772711,0.0,0.575599,0.03048,,
2,0.069604,1,,0.294808,0.642907,0.765235,0.0,0.553974,0.060972,,
3,0.041907,1,,0.299243,0.69835,0.760438,0.0,0.540955,0.079331,,
4,0.083597,1,,0.324199,0.725277,0.753277,0.0,0.514983,0.115954,,
5,0.501517,1,Size(Min),0.307253,0.680182,0.702776,0.0,0.359178,0.33566,,
6,0.23698,1,Size(Min),0.322558,0.726796,0.682307,0.0,0.285556,0.439477,,
7,0.041777,1,,0.34273,0.787421,0.680488,0.0,0.272577,0.457778,,
8,0.041721,1,,0.364248,0.850637,0.679295,0.0,0.259616,0.476056,,
9,0.055749,1,Size(Max),0.388556,0.924521,0.677755,0.0,0.242297,0.500479,,


In [23]:
def ProximtyVsGazeTime(df):

    # Calculate the Euclidean distance between the object and player positions
    df['Proximity'] = np.sqrt((df['PosX'] - df['PlayerPosX'])**2 + (df['PosY'] - df['PlayerPosY'])**2 + (df['PosZ'] - df['PlayerPosZ'])**2)

    # Create a scatter plot for GazeTime vs Proximity
    plt.figure(figsize=(10, 5))
    plt.scatter(df['Proximity'], df['GazeTime'])
    plt.title('GazeTime vs Proximity')
    plt.xlabel('Proximity')
    plt.ylabel('GazeTime')
    plt.show()

    # Calculate correlation between GazeTime and Proximity
    correlation, _ = pearsonr(df['GazeTime'], df['Proximity'])
    print('Pearson correlation: %.3f' % correlation)

    



In [24]:
def sum(arr):
    ret = 0
    for i in arr:
        ret += i
    return ret

def PercFixInside(dataframe):
    def Compute(region, interest):
        ret = pd.DataFrame()
        target_names = dataframe[dataframe['Region'] == region]['Target'].unique()
        grouped_data = dataframe[dataframe['Region'] == region].groupby('Target')
        gaze = [0] * len(target_names)  # Initialize list with zeros
        perc = [0] * len(target_names)
        ret['Target'] = target_names
        
        for j, target in enumerate(target_names):
            gaze[j] = grouped_data.get_group(target)[interest].sum()
        tot = sum(gaze)

        for i, g in enumerate(gaze):
            perc[i] = (g / tot) * 100 if tot > 0 else 0

        ret['GazeTime'] = gaze
        ret['Percentage'] = perc
        return ret

    regions = dataframe['Region'].unique()

    # Initialize a list to store the DataFrames for each region
    results = []

    for region in regions:
        newdata = Compute(region, 'GazeTime')
        newdata['Region'] = region  # Add a column for the region
        results.append(newdata)

    # Concatenate all the result DataFrames into one
    final_df = pd.concat(results, ignore_index=True)

    return final_df

PercFixInside(dataframes[10])

Unnamed: 0,Target,GazeTime,Percentage,Region
0,Size(Max),2.076236,100.0,1
1,,1.907745,100.0,2
2,,0.877279,47.36575,3
3,Red,0.682289,36.83791,3
4,White,0.29257,15.79634,3


In [25]:
def NFix(dataframe, threshold, ShowPercentage = False):
    if(threshold < 0):
        threshold = abs(threshold)
    nfix = {}  # Initialize the NFix counter as a dictionary
    counter = {}  # Initialize the counter as a dictionary
    pos = {}  # Variables to track previous position for each target

    for _, row in dataframe.iterrows():
        region = row['Region']
        target = row['Target']
        if region not in counter:
            counter[region] = {}
            nfix[region] = {}
        if target not in counter[region]:
            counter[region][target] = 0
            nfix[region][target] = 0
            pos[region, target] = (None, None)

        counter[region][target] += 1
        pos_x_new, pos_y_new = float(row['PosX']), float(row['PosY'])  # Get the X and Y positions

        # Check if a fixation occurred by comparing the current position with the previous position
        if pos[region, target][0] is not None and pos[region, target][1] is not None:
            pos_diff = abs(pos[region, target][0] - pos_x_new) + abs(pos[region, target][1] - pos_y_new)
            if pos_diff > threshold:
                nfix[region][target] += 1  # Increment the NFix counter for the target

        pos[region, target] = (pos_x_new, pos_y_new)  # Update the previous position for the target

    if ShowPercentage:
        for region in nfix:
            for target in nfix[region]:
                nfix[region][target] = (nfix[region][target] / counter[region][target]) * 100 if counter[region][target] > 0 else 0

    # Convert nested dictionary to DataFrame
    nfix_df = pd.DataFrame([(reg, tar, val) for reg, tar_val in nfix.items() for tar, val in tar_val.items()], 
                           columns=['Region', 'Target', 'NFix'])

    return nfix_df

In [26]:
def ConvergTime(dataframe):
    # Group the data by region and then by target
    dataframe = dataframe[dataframe['Target'] != 'None']
    grouped_data = dataframe.groupby(['Region', 'Target'])

    # Initialize an empty DataFrame to store the results
    results = pd.DataFrame(columns=['Region', 'Target', 'ConvergenceTime'])

    # Iterate over each group (region and target)
    for (region, target), group in grouped_data:
        # Calculate the average gaze time for the current group (target within the region)
        avg_gaze_time = group['GazeTime'].mean()
        
        # Append the result to the results DataFrame
        results = pd.concat([results, pd.DataFrame({'Region': [region], 'Target': [target], 'ConvergenceTime': [avg_gaze_time]})], ignore_index=True)

    return results

In [27]:
def calculate_proximity(df):
    # Calculate the Euclidean distance
    df['Proximity'] = np.sqrt((df['PosX'] - df['PlayerPosX'])**2 + (df['PosY'] - df['PlayerPosY'])**2 + (df['PosZ'] - df['PlayerPosZ'])**2)
    # Remove rows with 'None' Target
    df = df[df['Target'] != 'None']
    # Group by 'Target' and calculate mean distance
    df = df.groupby('Target')['Proximity'].mean().reset_index()
    return df




In [28]:
calculate_proximity(dataframes[10])

Unnamed: 0,Target,Proximity
0,Red,1.185751
1,Size(Max),1.143416
2,White,0.546998


In [29]:
# Step 1: Apply PercFixInside to each DataFrame in dataframes and accumulate the results


perc_results = [PercFixInside(df) for df in dataframes]

# Step 2: Concatenate the resulting DataFrames into one
all_perc = pd.concat(perc_results)

# Step 3: Group by 'Region' and 'Target' and calculate the mean
avg_perc = all_perc.groupby(['Region', 'Target']).mean().reset_index()

# This will now be a DataFrame with average 'GazeTime' and 'Percentage' for each 'Target' in each 'Region'
for i,j in enumerate(avg_perc['Percentage']):
    #print(str(i)+" : "+ str(j))
    avg_perc.loc[2,i] = j/len(dataframes)
avg_perc

Unnamed: 0,Region,Target,GazeTime,Percentage,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,1,,1.71746,68.008929,,,,,,,,,,,,,,,,
1,1,Size(Low),0.259207,12.277492,,,,,,,,,,,,,,,,
2,1,Size(Max),1.019604,49.281495,2.266964,0.40925,1.642717,0.633072,0.688113,2.715108,0.447792,0.44017,0.762179,0.519274,0.610591,0.262256,2.554072,0.705606,3.300018,0.975139
3,1,Size(Mid),0.367229,18.992167,,,,,,,,,,,,,,,,
4,1,Size(Min),0.436598,20.643399,,,,,,,,,,,,,,,,
5,2,,1.162741,81.45323,,,,,,,,,,,,,,,,
6,2,Transparency(Low),0.229349,13.433763,,,,,,,,,,,,,,,,
7,2,Transparency(Max),0.231577,13.205087,,,,,,,,,,,,,,,,
8,2,Transparency(Mid),0.378059,22.865383,,,,,,,,,,,,,,,,
9,2,Transparency(Min),0.27803,15.578221,,,,,,,,,,,,,,,,


In [30]:
# Step 1: Apply PercFixInside to each DataFrame in dataframes and accumulate the results
perc_results = [PercFixInside(df) for df in dataframes]

# Step 2: Concatenate the resulting DataFrames into one
all_perc = pd.concat(perc_results)

# Step 3: Group by 'Region' and 'Target' and calculate the sum of GazeTime
total_gaze = all_perc.groupby(['Region', 'Target'])['GazeTime'].sum().reset_index()

# Step 4: Calculate the total gaze time in each region
total_gaze_region = total_gaze.groupby('Region')['GazeTime'].sum()

# Step 5: Calculate the percentage of gaze time that each target represents in its region
total_gaze['Percentage'] = total_gaze.apply(lambda row: (row['GazeTime'] / total_gaze_region[row['Region']]) * 100, axis=1)

# This will now be a DataFrame with total 'GazeTime' and 'Percentage' for each 'Target' in each 'Region'
total_gaze


Unnamed: 0,Region,Target,GazeTime,Percentage
0,1,,46.371431,67.96464
1,1,Size(Low),1.296037,1.899546
2,1,Size(Max),16.313665,23.910247
3,1,Size(Mid),2.937832,4.305855
4,1,Size(Min),1.309795,1.919712
5,2,,30.23127,78.499737
6,2,Transparency(Low),1.834795,4.764303
7,2,Transparency(Max),1.852614,4.810571
8,2,Transparency(Mid),2.646416,6.87179
9,2,Transparency(Min),1.946207,5.053599


In [31]:
def calculate_avg_perc(dataframes):
    # Apply PercFixInside to each DataFrame in dataframes, filter out 'None' target and accumulate the results
    perc_results = []
    for df in dataframes:
        if df[df['Target'] != 'None'].empty:  # Check if filtered dataframe is empty
            continue  # Skip to next dataframe if it's empty
        perc_results.append(PercFixInside(df[df['Target'] != 'None']))
    
    # Concatenate the resulting DataFrames into one
    all_perc = pd.concat(perc_results)

    # Group by 'Region' and 'Target' and calculate the mean
    avg_perc = all_perc.groupby(['Region', 'Target']).mean().reset_index()

    # This will now be a DataFrame with average 'GazeTime' and 'Percentage' for each 'Target' in each 'Region'
    return avg_perc
calculate_avg_perc(dataframes)

Unnamed: 0,Region,Target,GazeTime,Percentage
0,1,Size(Low),0.259207,53.990491
1,1,Size(Max),1.019604,76.249045
2,1,Size(Mid),0.367229,45.590803
3,1,Size(Min),0.436598,48.445465
4,2,Transparency(Low),0.229349,34.104071
5,2,Transparency(Max),0.231577,34.392089
6,2,Transparency(Mid),0.378059,37.712518
7,2,Transparency(Min),0.27803,55.434727
8,3,Black,0.336015,63.086705
9,3,Blue,0.153389,15.309233


In [32]:
# Step 1: Apply NFix to each DataFrame in dataframes and accumulate the results
nfix_results = [NFix(df, 0.05, False) for df in dataframes]

# Step 2: Concatenate the resulting DataFrames into one
all_nfix = pd.concat(nfix_results)

# Step 3: Group by 'Region' and 'Target' and calculate the mean
avg_nfix = all_nfix.groupby(['Region', 'Target']).mean().reset_index()

# This will now be a DataFrame with average 'Count' and 'Percentage' for each 'Target' in each 'Region'
avg_nfix

Unnamed: 0,Region,Target,NFix
0,1,,6.37037
1,1,Size(Low),1.0
2,1,Size(Max),3.125
3,1,Size(Mid),1.5
4,1,Size(Min),0.333333
5,2,,8.884615
6,2,Transparency(Low),0.875
7,2,Transparency(Max),1.375
8,2,Transparency(Mid),1.714286
9,2,Transparency(Min),1.142857


In [33]:
def overall_average_distance(dataframes):
    # Create a list to store the distance dataframes
    distance_dfs = []

    # Loop through each dataframe in dataframes
    for df in dataframes:
        # Calculate the distance for each Target in the dataframe
        distance_df = calculate_distance(df)
        # Append the resulting dataframe to the list
        distance_dfs.append(distance_df)

    # Concatenate all distance dataframes into a single dataframe
    all_data = pd.concat(distance_dfs)

    # Group by Target and calculate the average distance
    all_data = all_data.groupby('Target')['Distance'].mean().reset_index()

    return all_data

# usage
overall_avg_distances = overall_average_distance(dataframes)
overall_avg_distances

NameError: name 'calculate_distance' is not defined

In [34]:
def overall_average_convergence_time(dataframes):
    # Create a list to store the convergence time dataframes
    convergence_time_dfs = []

    # Loop through each dataframe in dataframes
    for df in dataframes:
        # Calculate the convergence time for each 'Region' and 'Target' in the dataframe
        convergence_time_df = ConvergTime(df)
        # Append the resulting dataframe to the list
        convergence_time_dfs.append(convergence_time_df)

    # Concatenate all convergence time dataframes into a single dataframe
    all_data = pd.concat(convergence_time_dfs)

    # Group by 'Region' and 'Target' and calculate the average convergence time
    all_data = all_data.groupby(['Region', 'Target'])['ConvergenceTime'].mean().reset_index()

    return all_data

# usage
overall_avg_convergence_times = overall_average_convergence_time(dataframes)
overall_avg_convergence_times

Unnamed: 0,Region,Target,ConvergenceTime
0,1,Size(Low),0.109187
1,1,Size(Max),0.261737
2,1,Size(Mid),0.156927
3,1,Size(Min),0.218299
4,2,Transparency(Low),0.114248
5,2,Transparency(Max),0.090235
6,2,Transparency(Mid),0.116943
7,2,Transparency(Min),0.099942
8,3,Black,0.16218
9,3,Blue,0.153389


In [35]:
import numpy as np

def calculate_eccentricity(dataframe):
    # Get vectors from the eye to the object and from the eye straight ahead
    dataframe = dataframe.copy()
    dataframe = dataframe[dataframe['Target'] != 'None']

    eye_to_object = dataframe[['PosX', 'PosY', 'PosZ']].values - dataframe[['PlayerPosX', 'PlayerPosY', 'PlayerPosZ']].values
    straight_ahead = np.array([0, 0, 1])  # This assumes that straight ahead is along the z-axis

    # Normalize the vectors (make them length 1) so that dot product gives the cosine of the angle
    eye_to_object = eye_to_object / np.linalg.norm(eye_to_object, axis=1, keepdims=True)
    straight_ahead = straight_ahead / np.linalg.norm(straight_ahead)

    # Calculate the dot product of the two vectors, which is equal to the cosine of the angle between them
    cos_angle = np.dot(eye_to_object, straight_ahead)

    # Calculate the angle itself (in radians)
    angle = np.arccos(cos_angle)

    # Convert to degrees
    angle = np.degrees(angle)

    # Normalize the angle by the Oculus Quest 2's field of view
    fov = 89
    angle = angle / fov

    # Add the angle to the dataframe
    dataframe['Eccentricity'] = angle

    return dataframe

calculate_eccentricity(dataframes[0])


Unnamed: 0,GazeTime,Region,Target,PosX,PosY,PosZ,PlayerPosX,PlayerPosY,PlayerPosZ,LightIntensity,Velocity,Eccentricity
0,0.166807,1,Size(Max),0.441291,0.450383,0.898136,0.0,0.56943,0.0,,,0.303054
1,0.208184,1,Size(Max),0.501019,0.160109,1.0,0.0,0.504816,0.113474,,,0.387075
2,0.250302,1,Size(Max),0.584421,0.085556,0.985626,0.0,0.427128,0.249906,,,0.478836
3,0.306064,1,Size(Max),0.612888,0.0,0.969624,0.0,0.332133,0.416732,,,0.579559
4,0.097383,1,Size(Max),0.570646,0.520018,0.787474,0.0,0.301907,0.469812,,,0.702543
5,0.097494,1,Size(Max),0.692646,0.038718,0.964657,0.0,0.271646,0.522953,,,0.66123
6,0.110868,1,Size(Max),0.75592,0.051888,0.960085,0.0,0.237235,0.583383,,,0.721046
19,0.180809,2,Transparency(Min),0.790352,0.504473,0.126212,0.497533,0.986739,0.462303,,,1.3571
20,0.222286,2,Transparency(Min),0.851023,0.490412,0.134069,0.49815,0.988439,0.341128,,,1.221784
21,0.19417,2,Transparency(Min),0.904499,0.480939,0.140959,0.49869,0.989924,0.23528,,,1.103871
