In [11]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np
import pandas as pd
# Function to process the CSV files and count gaze points per quadrant#
import os
import json
import cv2

# Load the pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet')
# Remove the top layer to get the features instead of the classification
model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

def extract_features(input_data, target_size=(224, 224)):
    if isinstance(input_data, str):  # Check if the input is a file path
        img = image.load_img(input_data, target_size=target_size)
        img_array = image.img_to_array(img)
    elif isinstance(input_data, np.ndarray):  # Input is a NumPy array
        # Resize the image array to the target size if necessary
        # Note: This simplistic resizing might not preserve aspect ratio
        # Consider using more sophisticated resizing if aspect ratio is important
        img_array = cv2.resize(input_data, target_size, interpolation=cv2.INTER_AREA)
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    else:
        raise TypeError("Input must be a file path (str) or a NumPy array")

    preprocessed_img = preprocess_input(img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    return flattened_features


# Example usage
# feature_vector = extract_features('./data/William/eye_gaze_images/William_0ab3bc08-9243-4aa4-b145-338dab7163c3.png')

In [12]:
import pickle

def load_processed_data(file_path):
    with open(file_path, 'rb') as file:
        X, Y = pickle.load(file)
    return X, Y

In [13]:
X, Y = load_processed_data('./pickel_files/all_data_shape_2.pkl')

Y = np.array(Y)
X = np.array(X)

In [14]:
image_paths = [x for x in X]

In [16]:
batch_size = 32  # Adjust based on your system's capabilities
num_batches = len(image_paths) // batch_size + (1 if len(image_paths) % batch_size > 0 else 0)
all_features = []

for i in range(num_batches):
    batch_paths = image_paths[i*batch_size:(i+1)*batch_size]
    batch_features = [extract_features(path) for path in batch_paths]  # Process each path individually
    all_features.extend(batch_features)

# Convert to np.array and save as before
image_features = np.array(all_features)
with open('./pickel_files/image_features.pkl', 'wb') as file:
    pickle.dump(image_features, file)



In [None]:
# Load the features
with open('./pickel_files/image_features.pkl', 'rb') as file:
    image_features = pickle.load(file)

In [None]:
#convert Y to float
Y_x = np.array([float(entry[0]) for entry in Y])
Y_y = np.array([float(entry[1]) for entry in Y])

In [None]:
from scipy.stats import multivariate_normal
from sklearn.metrics.pairwise import cosine_similarity

# Assuming 'extract_features' is your feature extraction function

import numpy as np
from scipy.stats import multivariate_normal

def create_gaze_heatmap(gaze_point, image_size=(224, 224), std_dev=10):
    """
    Create a heatmap for a gaze point using a Gaussian distribution.
    """
    x, y = np.meshgrid(np.linspace(0, image_size[0]-1, image_size[0]), 
                       np.linspace(0, image_size[1]-1, image_size[1]))
    d = np.dstack([x, y])
    cov_matrix = np.array([[std_dev**2, 0], [0, std_dev**2]])  # Ensure covariance matrix is a NumPy array
    gaussian = multivariate_normal(mean=gaze_point, cov=cov_matrix)
    heatmap = gaussian.pdf(d)
    heatmap = (heatmap / np.max(heatmap) * 255).astype(np.uint8)
    return heatmap


def extract_gaze_features(gaze_points):
    # this function 
    """
    Extract features from gaze point heatmaps.
    """
    gaze_features = []
    for point in gaze_points:
        heatmap = create_gaze_heatmap(point)
        heatmap = np.repeat(heatmap[:, :, np.newaxis], 3, axis=2) # Convert to 3-channel image
        features = extract_features(heatmap) # Use your existing feature extraction method
        gaze_features.append(features)
    return np.array(gaze_features)

# Convert gaze points from Y to coordinates on the image
gaze_points_coords = [(x * 224, y * 224) for x, y in zip(Y_x, Y_y)] # Assuming 224x224 is the target image size

# Extract gaze point features
gaze_features = extract_gaze_features(gaze_points_coords)

# Calculate similarity between image features and gaze features
similarities = cosine_similarity(image_features, gaze_features)

# Identify mismatches based on similarity threshold
threshold = 0.5 # Define based on your dataset and requirements
mismatches = np.where(similarities < threshold)[0] # Indices of mismatches

# Nowfilter out anomalies or mismatches for further inspection.
anomalies = [Y[i] for i in mismatches]


In [None]:
with open('./pickel_files/anomalies.pkl', 'wb') as file:
    pickle.dump(anomalies, file)

#save the similarities
with open('./pickel_files/similarities.pkl', 'wb') as file:
    pickle.dump(similarities, file)

#save the mismatches
with open('./pickel_files/mismatches.pkl', 'wb') as file:
    pickle.dump(mismatches, file)

#save the gaze_features 
with open('./pickel_files/gaze_features.pkl', 'wb') as file:
    pickle.dump(gaze_features, file)


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Assuming X, Y, and image_paths are already defined and loaded correctly

# Flatten X and create DataFrame
X_flattened = X.reshape(X.shape[0], -1)
df = pd.DataFrame(X_flattened)

# Add target variables and convert to float
df['target_x'] = Y[:, 0].astype(float)
df['target_y'] = Y[:, 1].astype(float)

# Add image paths
df['image_path'] = image_paths

# Convert column names to strings to avoid future warnings
df.columns = df.columns.astype(str)

# Select features for standardization (exclude 'target_x', 'target_y', and 'image_path')
features = df.columns[:-3]  # Assuming the last three columns are 'target_x', 'target_y', and 'image_path'

# Standardize the features
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])


In [None]:
# Before extracting image features, ensure 'df' is in the expected state with correct columns.
# Specifically, ensure 'df' includes 'target_x', 'target_y', and any other columns you've previously added or modified.

# Extract image features. Consider batching if the dataset is large.
# Example of a simple batch processing (adjust 'batch_size' according to your system's memory capacity):
batch_size = 100  # Adjust based on your system's capabilities
n_batches = (len(df) + batch_size - 1) // batch_size  # Calculate number of batches needed

# Initialize an empty list to store image features
image_features = []

for i in range(n_batches):
    batch = df['image_path'][i*batch_size:(i+1)*batch_size]
    batch_features = batch.apply(extract_features)
    image_features.extend(batch_features)

# Convert the list of image features into a DataFrame
image_features_df = pd.DataFrame(image_features)

# Ensure the index of 'image_features_df' aligns with 'df'
image_features_df.index = df.index

# Now, concatenate the numerical features with the image features

final_df = pd.concat([df.drop(['image_path'], axis=1), image_features_df], axis=1)


In [None]:
# Assuming all previous steps are correct, up to the creation of `final_df`
from sklearn.ensemble import IsolationForest

# Initialize the Isolation Forest model
iso_forest = IsolationForest(n_estimators=100, contamination='auto', random_state=42)

# Ensure `final_df` contains only the features for the model
# This might require dropping or excluding target and other non-feature columns if they're included in `final_df`

# Fit the model on `final_df`
iso_forest.fit(final_df)



# Predict anomalies (-1 for anomalies, 1 for normal) using `final_df`
predictions = iso_forest.predict(final_df)

# Add predictions back to `final_df` or a similar DataFrame that includes identifiable information (like `image_path`)
# Since `final_df` doesn't have `image_path`, consider adding predictions to `df` which includes `image_path` for traceability
df['anomaly'] = predictions

# Filter anomalies for review, using `df` to trace back to `image_path`n
anomalies = df[df['anomaly'] == -1]

# Depending on your use case, you might want to examine these entries further
print("Number of anomalies detected:", len(anomalies))


In [None]:
# Iterate over the 'image_path' column and print each path
for path in anomalies['image_path']:
    print(path)


In [2]:
from keras.models import load_model
model = load_model('./models/eye_gaze_v19.h5')