In [42]:
from tensorflow.keras.applications import VGG16
import tensorflow as tf
import numpy as np
import os
from PIL import Image
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models
from sklearn.mixture import GaussianMixture

In [102]:
def preprocess_img(img_path, size=224):
    img = Image.open(img_path).convert("RGB")
    img = img.resize((size, size))
    img_array = np.array(img) / 255.0  
    return img_array 

In [88]:
def extract_gmm_features(img_path, n_components=4, size=224):
    img = preprocess_img(img_path, size)
    gmm = GaussianMixture(n_components=n_components, covariance_type='full', random_state=42)
    gmm.fit(img.reshape(-1, 3))

    mean_vals = gmm.means_.flatten()
    covariance_vals = gmm.covariances_.flatten()
    features = np.concatenate([mean_vals, covariance_vals])
    
    return features

In [101]:
def load_feature_dataset(root_dir, n_components=4, size=224):
    X = []
    y = []
    trial_metadata = []

    for rule_type in os.listdir(root_dir):
        rule_path = os.path.join(root_dir, rule_type)
        if not os.path.isdir(rule_path): continue

        for rule_folder in os.listdir(rule_path):
            img_folder = os.path.join(rule_path, rule_folder)
            if not os.path.isdir(img_folder): continue

            img_paths = [
                os.path.join(img_folder, "inlier_0.png"),
                os.path.join(img_folder, "inlier_1.png"),
                os.path.join(img_folder, "inlier_2.png"),
                os.path.join(img_folder, "outlier.png")
            ]

            features = [extract_gmm_features(p, n_components, size) for p in img_paths]
            paired = list(zip(img_paths, features))
            np.random.shuffle(paired)

            shuffled_paths, shuffled_features = zip(*paired)
            shuffled_features = list(shuffled_features)

            outlier_idx = [i for i, path in enumerate(shuffled_paths) if "outlier" in path][0]

            X.append(shuffled_features)
            y.append(outlier_idx)
            trial_metadata.append({
                "rule": rule_type,
                "img_paths": shuffled_paths,
                "true_outlier_idx": outlier_idx,
            })

    return np.array(X), np.array(y), trial_metadata

In [89]:
X, y, trial_metadata = load_feature_dataset("data", n_components=4, size=224)
X_flat = X.reshape((X.shape[0], -1))
indices = np.arange(len(X))

In [100]:
clf = MLPClassifier(hidden_layer_sizes=(128,), activation='relu', max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Odd-one-out accuracy (GMM feature extractor + MLP):", acc)

Odd-one-out accuracy (GMM feature extractor + MLP): 0.3333333333333333
