In [4]:
#create dictionary: ImagePath, array of FIXMAPS

import pandas as pd
import cv2
import numpy as np
import os
from scipy.ndimage import gaussian_filter
import math
from collections import Counter
import matplotlib.pyplot as plt
import re
import csv

grid_size = (20,20)
sigma = 1
def compute_ppda(distance, h_res, v_res, screen_w, screen_h):
    """
    Compute the number of pixels per degree of visual angle based on the experimental conditions.

    :param distance: int, the distance between the observer and the screen (in mm)
    :param h_res: int, the horizontal resolution of the screen
    :param v_res: int, the vertical resolution of the screen
    :param screen_w: int, the width of the screen (in mm)
    :param screen_h: int, the height of the screen (in mm)
    :return horizontal_ppda: float, the number of pixel per degree of visual angle
    """
    pxl_density_x = h_res / screen_w
    pxl_density_y = v_res / screen_h

    d = 2 * distance * math.tan(np.deg2rad(0.5))
    horizontal_ppda = d * ((pxl_density_x + pxl_density_y) / 2)

    return horizontal_ppda

def checkObserverRemembered(observer, image_path, base_dir):
    csv_file_path = os.path.join(base_dir, ".." ,"hit_status.csv")
    if not os.path.isfile(csv_file_path):
        print("Error: CSV file not found.")
        return False
    df = pd.read_csv(csv_file_path)
    filtered_rows = df[(df['Setup Folder'] == observer) & (df['Image Path'] == image_path) & (df['Hit'] == 1)]
    if not filtered_rows.empty:
        return True
    else:
        return False

distance = 610
h_res = 1920
v_res = 1080
screen_w = 527
screen_h = 296

ppda = compute_ppda(distance, h_res, v_res, screen_w, screen_h)
#print("ppda", ppda)

def bin_fixations(fixation_map):
    global grid_size
    height, width = fixation_map.shape
    binned_map = np.zeros(grid_size)

    bin_height = height // grid_size[0]
    bin_width = width // grid_size[1]

    for i in range(grid_size[0]):
        for j in range(grid_size[1]):
            bin_area = fixation_map[i*bin_height:(i+1)*bin_height, j*bin_width:(j+1)*bin_width]
            binned_map[i, j] = np.sum(bin_area)
            #ili avg?

    return binned_map

def normalize_map(binned_map):
    return binned_map / np.sum(binned_map)
    #return binned_map

def smooth_map(binned_map):
    global sigma
    return gaussian_filter(binned_map, sigma=sigma)

def process_fixation_map(fixation_map):
    binned_map = bin_fixations(fixation_map)
    normalized_map = normalize_map(binned_map)
    smoothed_map = smooth_map(normalized_map)
    return smoothed_map

def get_current_fixation_map(image_path, coordinates):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Image at {image_path} not found.")
        return

    coordinates = coordinates[0:120]
  
    fixation_map = np.zeros((1080, 1920), dtype=np.float32)

    # Convert coordinates to pixel coordinates and update the saliency map
    for x_norm, y_norm in coordinates:
        # Scale normalized coordinates to pixel coordinates for the 1920x1080 screen
        if(x_norm >0 and y_norm >0):
            x = int((x_norm + 1 + 0.1) * 960)  # Scaling from (-1, 1) to (0, 1920) range
            y = int((y_norm + 0.5 + 0.05) * 1080) # Scaling from (-0.5, 0.5) to (0, 1080) range
        if(x_norm >0 and y_norm <0):
            x = int((x_norm + 1 + 0.1) * 960)  # Scaling from (-1, 1) to (0, 1920) range
            y = int((y_norm + 0.5 - 0.1) * 1080) # Scaling from (-0.5, 0.5) to (0, 1080) range
        if(x_norm <0 and y_norm >0):
            x = int((x_norm + 1 - 0.1) * 960)  # Scaling from (-1, 1) to (0, 1920) range
            y = int((y_norm + 0.5 + 0.05) * 1080) # Scaling from (-0.5, 0.5) to (0, 1080) range
        if(x_norm <0 and y_norm <0):
            x = int((x_norm + 1 - 0.1) * 960)  # Scaling from (-1, 1) to (0, 1920) range
            y = int((y_norm + 0.5 - 0.1) * 1080) # Scaling from (-0.5, 0.5) to (0, 1080) range
        # Update the saliency map if coordinates are within the screen
        if 0 <= x < 1920 and 0 <= y < 1080:
            fixation_map[y, x] += 1
    #sigma = ppda / np.sqrt(2)
    #fixation_map = gaussian_filter(fixation_map, sigma = sigma)
    fixation_map = cv2.GaussianBlur(fixation_map, (11,11), 0)
    # Crop the saliency map to the 700x700 region
    fixation_map = fixation_map[190:890, 610:1310]
    # flip the Y coordinates
    fixation_map = np.flipud(fixation_map)
    return fixation_map

def normalize_fixation_map(fixation_map):
    min_val = np.min(fixation_map)
    max_val = np.max(fixation_map)
    normalized_fixation_map = (fixation_map - min_val) / (max_val - min_val) * 255
    return normalized_fixation_map

# 90experiments folder
base_dir = os.path.abspath(os.path.join(os.getcwd(),"..", "90experiments"))

fixation_maps = {}  # Dictionary to store fixation maps for each imagePath


for folder in os.listdir(base_dir):
    folder_path = os.path.join(base_dir, folder)
    if not os.path.isdir(folder_path):
        continue
    match = re.search(r'\d{1,2}$', folder)
    if match:
        observer = int(match.group())
    #if(observer != 5):
    #    continue
    if(observer == 1 or observer == 2 or observer == 49 or observer == 50 or observer == 5):
        continue

    #if(observer not in [70,71,73,74,76,77,79,80,82,83,85,87,88,89,86,90,18,57,6,45,48,60,63,69,3,9,12,21,15,27,30,33,36,42,24,66,51,54,72,75]):
    #    continue
        
    csv_file_path = os.path.join(folder_path, "eye_tracker_data.csv")
    if not os.path.isfile(csv_file_path):
        continue
    data = pd.read_csv(csv_file_path)

    filtered_data = data[data['ImagePath'].str.startswith('targetImages')]
    
    uniqueImagePaths = []
    delete_rows = []

    #get only the eye-tracking data from the first viewing
    index = 0
    
    row = filtered_data.iloc[index]
    while len(uniqueImagePaths) < 10:
        row = filtered_data.iloc[index]
        if(row['ImagePath'] not in uniqueImagePaths):
            uniqueImagePaths.append(row['ImagePath'])
            lastImagePath = row['ImagePath']
            index +=1
        elif(row['ImagePath'] in uniqueImagePaths):
            index += 1    
    row = filtered_data.iloc[index]

    while(row['ImagePath'] == lastImagePath):
        index +=1
        row = filtered_data.iloc[index]

    filtered_data.reset_index(drop=True, inplace=True)
    filtered_data = filtered_data.iloc[:index].copy()
    
    grouped = filtered_data.groupby('ImagePath')

    # Generate and save fixation maps for each image in the current folder
    for image_path, group in grouped:
        # Construct full image path by going one directory back from base_dir
        full_image_path = os.path.abspath(os.path.join(base_dir, "..", image_path))
        full_image_path = full_image_path.replace('\\', '/')

        #check if current observer has remembered this image, if not, continue
        if(not checkObserverRemembered(observer, image_path, base_dir)):
            continue
        
        # Extract coordinates
        coordinates = group[['PosX', 'PosY']].values

        current_fixation_map = get_current_fixation_map (full_image_path, coordinates)
        if(np.all(current_fixation_map == 0)):
            continue

        current_fixation_map_20x20 = process_fixation_map(current_fixation_map)
        current_fixation_map_20x20 = normalize_fixation_map(current_fixation_map_20x20)
        
        #current_fixation_map_20x20 = (current_fixation_map)
        #add to dictionary or update it
        if image_path not in fixation_maps:
            fixation_maps[image_path] = [current_fixation_map_20x20]
        else:
            fixation_maps[image_path].append(current_fixation_map_20x20)

# Flatten the fixation maps and standardize them
all_fixation_maps = []
labels = []

for image_path, maps in fixation_maps.items():
    for fixation_map in maps:
        all_fixation_maps.append(fixation_map.flatten())
        labels.append(image_path)

X = np.array(all_fixation_maps)
y = np.array(labels)

print(len(all_fixation_maps))

626


In [7]:
#LEave one out cross validation

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import classification_report, accuracy_score
import collections

# Assuming X and y are already defined and X contains grayscale images
X = X.reshape(-1, 20, 20, 1)  # Reshape to include channel dimension

# Encode the labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Convert labels to categorical format
y_categorical = to_categorical(y_encoded, num_classes=len(label_encoder.classes_))

# Define leave-one-out cross-validation
loo = LeaveOneOut()

# Initialize variables to store results
acc_per_fold = []
loss_per_fold = []
fold_no = 1

problematic_samples = []
high_loss_threshold = 3.0  # Define a threshold for high loss

for train, test in loo.split(X, y_encoded):
    model = Sequential([
        Input(shape=(grid_size[0], grid_size[1], 1)),
        Flatten(),
        Dense(512, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.5),
        Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.5),
        Dense(len(np.unique(y)), activation='softmax', kernel_regularizer=l2(0.001))
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Calculate class weights
    class_counts = collections.Counter(y_encoded[train])
    total_samples = sum(class_counts.values())
    class_weights = {cls: total_samples / count for cls, count in class_counts.items()}

    # Callbacks
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=30, min_lr=1e-6)
    early_stopping = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)

    # Train the model
    history = model.fit(X[train], to_categorical(y_encoded[train], num_classes=len(label_encoder.classes_)),
                        class_weight=class_weights,
                        epochs=300,
                        validation_data=(X[test], to_categorical(y_encoded[test], num_classes=len(label_encoder.classes_))),
                        callbacks=[reduce_lr, early_stopping], verbose = 0)

    # Evaluate the model
    scores = model.evaluate(X[test], to_categorical(y_encoded[test], num_classes=len(label_encoder.classes_)), verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])
    # Store problematic samples
    if scores[0] > high_loss_threshold:
        problematic_samples.append(test[0])

    fold_no += 1

# Print the average accuracy and loss over all folds
print('Average accuracy over all folds:', np.mean(acc_per_fold))
print('Average loss over all folds:', np.mean(loss_per_fold))

# Print and remove problematic samples
print(f'Problematic samples (indices with loss > {high_loss_threshold}):', problematic_samples)

# Remove problematic samples from the dataset
X_filtered = np.delete(X, problematic_samples, axis=0)
y_filtered = np.delete(y_encoded, problematic_samples, axis=0)

print('Number of samples after removing problematic samples:', X_filtered.shape[0])


Score for fold 1: loss of 0.9145313501358032; compile_metrics of 100.0%
Score for fold 2: loss of 0.8997101187705994; compile_metrics of 100.0%
Score for fold 3: loss of 5.18798828125; compile_metrics of 0.0%
Score for fold 4: loss of 5.27087926864624; compile_metrics of 0.0%
Score for fold 5: loss of 0.8512699007987976; compile_metrics of 100.0%
Score for fold 6: loss of 1.3072110414505005; compile_metrics of 100.0%
Score for fold 7: loss of 2.512240171432495; compile_metrics of 0.0%
Score for fold 8: loss of 1.5243921279907227; compile_metrics of 100.0%
Score for fold 9: loss of 0.8665581345558167; compile_metrics of 100.0%
Score for fold 10: loss of 2.663489818572998; compile_metrics of 0.0%
Score for fold 11: loss of 1.3502180576324463; compile_metrics of 100.0%
Score for fold 12: loss of 3.0292389392852783; compile_metrics of 0.0%
Score for fold 13: loss of 1.3630461692810059; compile_metrics of 100.0%
Score for fold 14: loss of 5.265138149261475; compile_metrics of 0.0%
Score for