In [1]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Simulated data to illustrate the example
# Suppose we have 108 unique classes and 2268 samples
np.random.seed(0)
num_samples = 2268
num_features = 1536
num_classes = 108

# Randomly generate train_features with shape (2268, 1536)
train_features = np.random.rand(num_samples, num_features)

# Randomly generate train_labels with values from '001' to '108'
train_labels = np.array([f"{i:03}" for i in np.random.randint(1, num_classes + 1, num_samples)])

# Insttrain_features = np.vstack(train_features)antiate and fit LDA model
lda = LinearDiscriminantAnalysis(n_components=min(num_classes - 1, num_features))

# Perform LDA transformation
reduced_features = lda.fit_transform(train_features, train_labels)
print("Reduced features shape:", reduced_features.shape)  # Should be (2268, 107)
print("Unique labels in train_labels:", np.unique(train_labels))

# Compute class centers in the reduced space
class_centers = {}
for label in np.unique(train_labels):
    # Select projected vectors (f) of the current class
    class_reduced_features = reduced_features[train_labels == label]
    
    # Calculate the mean vector for this class in reduced space
    class_centers[label] = np.mean(class_reduced_features, axis=0)

# Outputting some results for illustration
# Display the shape of the mean vector for the first few classes
{label: center.shape for label, center in list(class_centers.items())[:5]}


Reduced features shape: (2268, 107)
Unique labels in train_labels: ['001' '002' '003' '004' '005' '006' '007' '008' '009' '010' '011' '012'
 '013' '014' '015' '016' '017' '018' '019' '020' '021' '022' '023' '024'
 '025' '026' '027' '028' '029' '030' '031' '032' '033' '034' '035' '036'
 '037' '038' '039' '040' '041' '042' '043' '044' '045' '046' '047' '048'
 '049' '050' '051' '052' '053' '054' '055' '056' '057' '058' '059' '060'
 '061' '062' '063' '064' '065' '066' '067' '068' '069' '070' '071' '072'
 '073' '074' '075' '076' '077' '078' '079' '080' '081' '082' '083' '084'
 '085' '086' '087' '088' '089' '090' '091' '092' '093' '094' '095' '096'
 '097' '098' '099' '100' '101' '102' '103' '104' '105' '106' '107' '108']


{'001': (107,), '002': (107,), '003': (107,), '004': (107,), '005': (107,)}

In [1]:
import numpy as np
import random

# Sample feature vectors (2D array, where each row is a feature vector)
features = np.array([
    [1.0, 1.2],  # Class 0
    [1.1, 1.3],  # Class 0
    [5.0, 5.2],  # Class 1
    [5.1, 5.3],  # Class 1
    [9.0, 9.2],  # Class 2
    [9.1, 9.3],  # Class 2
])

# Corresponding labels for each feature vector
labels = np.array([0, 0, 1, 1, 2, 2])


In [3]:
def create_pairs(features, labels, num_genuine_pairs=2, num_impostor_pairs=2):
    pairs = []
    pair_labels = []
    
    # Dictionary to group features by their label
    label_to_indices = {label: np.where(labels == label)[0] for label in np.unique(labels)}
    
    # Generate Genuine Pairs
    for _ in range(num_genuine_pairs):
        label = random.choice(list(label_to_indices.keys()))
        i, j = np.random.choice(label_to_indices[label], 2, replace=False)
        pairs.append((features[i], features[j]))
        pair_labels.append(1)  # Label 1 for genuine pairs

    # Generate Impostor Pairs
    for _ in range(num_impostor_pairs):
        label1, label2 = np.random.choice(list(label_to_indices.keys()), 2, replace=False)
        i = np.random.choice(label_to_indices[label1])
        j = np.random.choice(label_to_indices[label2])
        pairs.append((features[i], features[j]))
        pair_labels.append(0)  # Label 0 for impostor pairs

    return pairs, pair_labels

# Run the function
genuine_impostor_pairs, pair_labels = create_pairs(features, labels, num_genuine_pairs=2, num_impostor_pairs=2)

# Display results
print("Generated Pairs (Feature Vector Pairs):")
for idx, (pair, label) in enumerate(zip(genuine_impostor_pairs, pair_labels)):
    print(f"Pair {idx+1}:", pair, "| Genuine" if label == 1 else "| Impostor")

print(genuine_impostor_pairs)
print(pair_labels)


Generated Pairs (Feature Vector Pairs):
Pair 1: (array([5. , 5.2]), array([5.1, 5.3])) | Genuine
Pair 2: (array([1.1, 1.3]), array([1. , 1.2])) | Genuine
Pair 3: (array([9.1, 9.3]), array([5.1, 5.3])) | Impostor
Pair 4: (array([5. , 5.2]), array([1.1, 1.3])) | Impostor
[(array([5. , 5.2]), array([5.1, 5.3])), (array([1.1, 1.3]), array([1. , 1.2])), (array([9.1, 9.3]), array([5.1, 5.3])), (array([5. , 5.2]), array([1.1, 1.3]))]
[1, 1, 0, 0]
