In [1]:
from typing import Sequence, Any
import numpy as np

class Index:
    """
    Represents a mapping from a vocabulary (e.g., strings) to integers.
    """
    def __init__(self, vocab: Sequence[Any], start=0):
        """
        Assigns an index to each unique item in the vocab iterable,
        with indexes starting from start.
        """
        self.start = start
        self.vocab = list(dict.fromkeys(vocab))  # Remove duplicates while maintaining order
        self.index_map = {word: i for i, word in enumerate(self.vocab, start=start)}
        self.reverse_map = {i: word for word, i in self.index_map.items()}
        self.unknown_index = start - 1  # Index for unknown words

    def objects_to_indexes(self, object_seq: Sequence[Any]) -> np.ndarray:
        """
        Returns a vector of the indexes associated with the input objects.
        """
        return np.array([self.index_map.get(obj, self.unknown_index) for obj in object_seq])

    def objects_to_index_matrix(self, object_seq_seq: Sequence[Sequence[Any]]) -> np.ndarray:
        """
        Returns a matrix of the indexes associated with the input objects.
        """
        max_length = max(len(seq) for seq in object_seq_seq)
        matrix = np.full((len(object_seq_seq), max_length), self.unknown_index, dtype=int)
        for i, seq in enumerate(object_seq_seq):
            matrix[i, :len(seq)] = self.objects_to_indexes(seq)
        return matrix

    def objects_to_binary_vector(self, object_seq: Sequence[Any]) -> np.ndarray:
        """
        Returns a binary vector, with a 1 at each index corresponding to one of the input objects.
        """
        binary_vector = np.zeros(len(self.vocab) + self.start, dtype=int)
        for obj in object_seq:
            if obj in self.index_map:
                binary_vector[self.index_map[obj]] = 1
        return binary_vector

    def objects_to_binary_matrix(self, object_seq_seq: Sequence[Sequence[Any]]) -> np.ndarray:
        """
        Returns a binary matrix, with a 1 at each index corresponding to one of the input objects.
        """
        matrix = np.zeros((len(object_seq_seq), len(self.vocab) + self.start), dtype=int)
        for i, seq in enumerate(object_seq_seq):
            matrix[i] = self.objects_to_binary_vector(seq)
        return matrix

    def indexes_to_objects(self, index_vector: np.ndarray) -> Sequence[Any]:
        """
        Returns a sequence of objects associated with the indexes in the input vector.
        """
        return [self.reverse_map[i] for i in index_vector if i in self.reverse_map]

    def index_matrix_to_objects(self, index_matrix: np.ndarray) -> Sequence[Sequence[Any]]:
        """
        Returns a sequence of sequences of objects associated with the indexes in the input matrix.
        """
        return [self.indexes_to_objects(row) for row in index_matrix]

    def binary_vector_to_objects(self, vector: np.ndarray) -> Sequence[Any]:
        """
        Returns a sequence of the objects identified by the nonzero indexes in the input vector.
        """
        return [self.reverse_map[i] for i in np.nonzero(vector)[0] if i in self.reverse_map]

    def binary_matrix_to_objects(self, binary_matrix: np.ndarray) -> Sequence[Sequence[Any]]:
        """
        Returns a sequence of sequences of objects identified by the nonzero indices in the input matrix.
        """
        return [self.binary_vector_to_objects(row) for row in binary_matrix]