In [9]:
from numpy.linalg import norm
import os
import pandas as pd
import json
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np
import pandas as pd
import cv2



In [10]:


# Load the pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet')
# Remove the top layer to get the features instead of the classification
model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

def extract_features(input_data, target_size=(224, 224)):
    if isinstance(input_data, str):  # Check if the input is a file path
        img = image.load_img(input_data, target_size=target_size)
        img_array = image.img_to_array(img)
    elif isinstance(input_data, np.ndarray):  # Input is a NumPy array
        img_array = cv2.resize(input_data, target_size, interpolation=cv2.INTER_AREA)
    else:
        raise TypeError("Input must be a file path (str) or a NumPy array")
    
    # Ensure img_array is 4D (batch_size, height, width, channels)
    if img_array.ndim == 3:
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    
    preprocessed_img = preprocess_input(img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    return flattened_features


# Example usage
# feature_vector = extract_features('../data/William/eye_gaze_images/William_0ab3bc08-9243-4aa4-b145-338dab7163c3.png')

In [11]:


def calculate_quadrant(gaze_x, gaze_y, screen_width, screen_height, grid_size):
    """
    Calculate which quadrant a gaze point falls into.
    """
    screen_height, screen_width = int(screen_height), int(screen_width)
    quadrant_width = screen_width / grid_size
    quadrant_height = screen_height / grid_size
    quadrant_x = int(gaze_x // quadrant_width)
    quadrant_y = int(gaze_y // quadrant_height)
    return quadrant_x, quadrant_y

def get_screen_size(metadata_file_path):
    with open(metadata_file_path, 'r') as f:
        metadata = json.load(f)

        # Check if 'screenData' is a key in the metadata
        if 'screenData' in metadata:
            metadata = metadata['screenData']
        # Otherwise, assume the metadata is already at the top level

        screen_width = metadata.get('screenWidth')
        screen_height = metadata.get('screenHeight')

        if screen_width is None or screen_height is None:
            raise ValueError("Screen size not found in metadata")
        return screen_width, screen_height

def assign_gaze_to_quadrants(base_dir, grid_size):
    """
    Walk through all CSV files, extract gaze points, and assign them to quadrants.
    """
    quadrant_assignments = {}

    # Walk through each user directory
    for user_dir in next(os.walk(base_dir))[1]:
        full_user_dir = os.path.join(base_dir, user_dir)
        metadata_file_path = os.path.join(full_user_dir, 'metadata.json')
        user_quadrants = []

        if os.path.exists(metadata_file_path):
            screen_width, screen_height = get_screen_size(metadata_file_path)

            # Process each CSV file
            for file in os.listdir(full_user_dir):
                if file.endswith('.csv'):
                    csv_path = os.path.join(full_user_dir, file)
                    gaze_data = pd.read_csv(csv_path, header=None)

                    for _, row in gaze_data.iterrows():
                        quadrant_x, quadrant_y = calculate_quadrant(row[1], row[2], screen_width, screen_height, grid_size)
                        user_quadrants.append((quadrant_x, quadrant_y))

        quadrant_assignments[user_dir] = user_quadrants

    return quadrant_assignments

In [18]:
base_dir = '../data'
grid_size = 10
quadrant_assignments = assign_gaze_to_quadrants(base_dir, grid_size)

In [19]:
def cosine_similarity(vec_a, vec_b):
    return np.dot(vec_a, vec_b) / (norm(vec_a) * norm(vec_b))

import numpy as np

def convert_for_json(data):
    converted_data = {}
    for key, value in data.items():
        # Convert the tuple key to a string
        str_key = f"{key[0]},{key[1]}"
        converted_list = []
        for item in value:
            user_dir, file, features = item
            # Convert the NumPy array features to a list
            features_list = features.tolist() if isinstance(features, np.ndarray) else features
            converted_list.append((user_dir, file, features_list))
        converted_data[str_key] = converted_list
    return converted_data


In [20]:
def convert_for_json(data):
    converted_data = {}
    for key, value in data.items():
        # Convert the tuple key to a string
        str_key = f"{key[0]},{key[1]}"
        converted_list = []
        for features_array in value:
            # Directly convert the NumPy array features to a list
            features_list = features_array.tolist() if isinstance(features_array, np.ndarray) else features_array
            converted_list.append(features_list)  # Append the list of features directly
        converted_data[str_key] = converted_list
    return converted_data


In [21]:
def compare_gaze_images(base_dir, quadrant_features = None, reference_features = None, reference_user="Will", threshold=0.2):
    if quadrant_features is None or reference_features is None:
        quadrant_features = {}
        reference_features = {}
        # Walk through each user directory
        for user_dir in next(os.walk(base_dir))[1]:
            full_user_dir = os.path.join(base_dir, user_dir)
            metadata_file_path = os.path.join(full_user_dir, 'metadata.json')

            # Obtain screen size from metadata
            if os.path.exists(metadata_file_path):
                screen_width, screen_height = get_screen_size(metadata_file_path)

                # Process each CSV file
                for file in os.listdir(full_user_dir):
                    if file.endswith('.csv'):
                        csv_path = os.path.join(full_user_dir, file)
                        gaze_data = pd.read_csv(csv_path, header=None)
                        # Assuming the gaze coordinates are in specific rows; adjust indices as necessary
                        for _, row in gaze_data.iterrows():
                            # Skip rows without proper data (e.g., header or image path rows)
                            if row.size < 3 or pd.isnull(row[1]) or pd.isnull(row[2]):
                                continue

                            # Assuming gaze_x and gaze_y are the second and third columns
                            gaze_x, gaze_y = row[1], row[2]
                            quadrant_x, quadrant_y = calculate_quadrant(gaze_x, gaze_y, screen_width, screen_height, grid_size)

                            # Now process the image for this gaze point
                            image_path = gaze_data.iloc[0, 0]
                            
                            features = extract_features(f'../{image_path}')

                            quadrant = (quadrant_x, quadrant_y)

                            # Store features indexed by user and quadrant
                            if user_dir == reference_user:
                                if quadrant not in reference_features:
                                    reference_features[quadrant] = []
                                reference_features[quadrant].append(features)
                            else:
                                if quadrant not in quadrant_features:
                                    quadrant_features[quadrant] = []
                                quadrant_features[quadrant].append((user_dir, file, features))

    # Now compare features within each quadrant
    for quadrant, features_list in quadrant_features.items():
        print(f"Comparing quadrant {quadrant}...")
        reference_vecs = reference_features.get(quadrant, [])
        for ref_vec in reference_vecs:
            for user_dir, file, user_vec in features_list:
                similarity = cosine_similarity(ref_vec, user_vec)
                # print(f"Similarity between {reference_user} and {user_dir} for quadrant {quadrant}: {similarity}")
                if similarity < threshold:
                    print(f"Image {file} from {user_dir} is significantly different in quadrant {quadrant}, consider removing.")

    return quadrant_features, reference_features


In [22]:
import json

def load_from_json(filename):
    with open(filename, 'r') as f:
        return json.load(f)

# Load the data
quadrant_features_loaded = load_from_json('quadrant_features.json')
reference_features_loaded = load_from_json('reference_features.json')


In [26]:
quad_fet, ref_fet = compare_gaze_images('../data', quadrant_features_loaded, reference_features_loaded, reference_user="William", threshold=0.6)

Comparing quadrant 3,1...
Image calibration_data.csv from eloise is significantly different in quadrant 3,1, consider removing.
Image calibration_data.csv from eloise is significantly different in quadrant 3,1, consider removing.
Image calibration_data.csv from eloise is significantly different in quadrant 3,1, consider removing.
Image calibration_data.csv from eloise is significantly different in quadrant 3,1, consider removing.
Comparing quadrant 2,3...
Image calibration_data.csv from eloise is significantly different in quadrant 2,3, consider removing.
Image calibration_data.csv from eloise is significantly different in quadrant 2,3, consider removing.
Image calibration_data.csv from eloise is significantly different in quadrant 2,3, consider removing.
Image calibration_data.csv from eloise is significantly different in quadrant 2,3, consider removing.
Comparing quadrant 1,7...
Comparing quadrant 1,6...
Image calibration_data.csv from eloise is significantly different in quadrant 1,

In [None]:
# Save the data
with open('quadrant_features.json', 'w') as f:
    json.dump(convert_for_json(quad_fet), f)


In [None]:
with open('reference_features.json', 'w') as f:
    json.dump(convert_for_json(ref_fet), f)