In [1]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np
import pandas as pd
# Function to process the CSV files and count gaze points per quadrant#
import os
import json

# Load the pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet')
# Remove the top layer to get the features instead of the classification
model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

def extract_features(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    return flattened_features

# Example usage
# feature_vector = extract_features('./data/William/eye_gaze_images/William_0ab3bc08-9243-4aa4-b145-338dab7163c3.png')

In [None]:
from keras.utils import plot_model

plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)


In [2]:
import pickle

def load_processed_data(file_path):
    with open(file_path, 'rb') as file:
        X, Y = pickle.load(file)
    return X, Y

In [3]:
X, Y = load_processed_data('./pickel_files/all_data_shape_2.pkl')
print(len(Y))
Y = np.array(Y)
print(Y.shape)

3979
(3979, 3)


In [4]:
image_paths = [y[-1] for y in Y]  # Extract the image paths from Y

In [5]:
print(len(image_paths))

3979


In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Assuming X, Y, and image_paths are already defined and loaded correctly
X = np.array(X)
# Flatten X and create DataFrame
X_flattened = X.reshape(X.shape[0], -1)
df = pd.DataFrame(X_flattened)

# Add target variables and convert to float
df['target_x'] = Y[:, 0].astype(float)
df['target_y'] = Y[:, 1].astype(float)

# Add image paths
df['image_path'] = image_paths

# Convert column names to strings to avoid future warnings
df.columns = df.columns.astype(str)

# Select features for standardization (exclude 'target_x', 'target_y', and 'image_path')
features = df.columns[:-3]  # Assuming the last three columns are 'target_x', 'target_y', and 'image_path'

# Standardize the features
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])


In [8]:
# Before extracting image features, ensure 'df' is in the expected state with correct columns.
# Specifically, ensure 'df' includes 'target_x', 'target_y', and any other columns you've previously added or modified.

# Extract image features. Consider batching if the dataset is large.
# Example of a simple batch processing (adjust 'batch_size' according to your system's memory capacity):
batch_size = 100  # Adjust based on your system's capabilities
n_batches = (len(df) + batch_size - 1) // batch_size  # Calculate number of batches needed

# Initialize an empty list to store image features
image_features = []

for i in range(n_batches):
    batch = df['image_path'][i*batch_size:(i+1)*batch_size]
    batch_features = batch.apply(extract_features)
    image_features.extend(batch_features)

# Convert the list of image features into a DataFrame
image_features_df = pd.DataFrame(image_features)

# Ensure the index of 'image_features_df' aligns with 'df'
image_features_df.index = df.index

# Now, concatenate the numerical features with the image features
# Drop 'image_path' as it's no longer needed for modeling, and 'image_features' which was a temporary column
# Note: 'target_x' and 'target_y' are not in 'df' at this point if you followed the previous advice correctly
final_df = pd.concat([df.drop(['image_path'], axis=1), image_features_df], axis=1)




In [11]:
df.columns = df.columns.astype(str)


In [13]:
# Verify that all column names are now of type string
all(isinstance(name, str) for name in df.columns)


True

In [12]:
# Assuming all previous steps are correct, up to the creation of `final_df`
from sklearn.ensemble import IsolationForest

# Initialize the Isolation Forest model
iso_forest = IsolationForest(n_estimators=100, contamination='auto', random_state=42)

# Ensure `final_df` contains only the features for the model
# This might require dropping or excluding target and other non-feature columns if they're included in `final_df`

# Fit the model on `final_df`
iso_forest.fit(final_df)

# Predict anomalies (-1 for anomalies, 1 for normal) using `final_df`
predictions = iso_forest.predict(final_df)

# Add predictions back to `final_df` or a similar DataFrame that includes identifiable information (like `image_path`)
# Since `final_df` doesn't have `image_path`, consider adding predictions to `df` which includes `image_path` for traceability
df['anomaly'] = predictions

# Filter anomalies for review, using `df` to trace back to `image_path`
anomalies = df[df['anomaly'] == -1]

# Depending on your use case, you might want to examine these entries further
print("Number of anomalies detected:", len(anomalies))




Number of anomalies detected: 218


In [20]:
# Iterate over the 'image_path' column and print each path
for path in anomalies['image_path']:
    print(path)


data/eloise/calibration_images/eloise_b840a661-f6c1-48a6-8412-dea04c7aa244.png
data/eloise/calibration_images/eloise_b327960d-8d10-4875-9c54-54c616cbdd86.png
data/muzzy/eye_gaze_images/muzzy_c8c0fe94-4f2a-45b1-8d59-a1193c7d1d8f.png
data/Naia/eye_gaze_images/Naia_b6037f5a-3517-4911-8337-a417a3d320cc.png
data/Shaq/calibration_images/Shaq_2400d572-9b59-4471-9d48-002184ffcc9b.png
data/Shaq/images/Shaq_1a3243ba-911a-4662-b5c7-896b11c0e091.png
data/Shaq/images/Shaq_9bca1a68-6a14-4723-93e1-cc5f4626681a.png
data/Shaq/images/Shaq_bc282dca-7524-4dce-b2ef-ee0ffa143266.png
data/Shaq/images/Shaq_ddc764b2-116d-4840-82a6-6cf8acd7d7b7.png
data/Will/calibration_images/Will_3de50cf2-a055-4690-b67d-e5e69f1c3fd0.png
data/Will/calibration_images/Will_65c362a2-4c9b-471a-b148-093e46eb99a4.png
data/Will/calibration_images/Will_4489c104-7128-4cdb-9f0c-2d5a4fb62c60.png
data/William/calibration_images/William_5d43d76d-3f0c-4c8b-b2e9-b6d5f845a222.png
data/William/calibration_images/William_d65d9cbb-5be1-409d-a425