<a href="https://colab.research.google.com/github/avikumart/LLM-GenAI-Transformers-Notebooks/blob/main/DeepLearningFiles/stv_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
The main code for the Strings-to-Vectors assignment. See README.md and Instructions for details.
"""
from typing import Sequence, Any

import numpy as np

class Index:
    """
    Represents a mapping from a vocabulary (e.g., strings) to integers.
    """

    def __init__(self, vocab: Sequence[Any], start=0):
        """
        Assigns an index to each unique item in the `vocab` iterable,
        with indexes starting from `start`.

        Indexes should be assigned in order, so that the first unique item in
        `vocab` has the index `start`, the second unique item has the index
        `start + 1`, etc.
        """
        seen = set()
        self.vocab = []
        for item in vocab:
          if item not in seen:
            seen.add(item)
            self.vocab.append(item)
        self.start = start
        self.mapping = {o: i for i, o in enumerate(self.vocab)}
        self.index_to_token = {i:o for i,o in enumerate(self.vocab)}

    def objects_to_indexes(self, object_seq: Sequence[Any]) -> np.ndarray:
        """
        Returns a vector of the indexes associated with the input objects.

        For objects not in the vocabulary, `start-1` is used as the index.

        :param object_seq: A sequence of objects.
        :return: A 1-dimensional array of the object indexes.
        """
        ##YOUR CODE HERE##
        index_list = []
        for obj in object_seq:
            # Use 'get' to handle objects not in vocabulary
            index = self.mapping.get(obj, -1)
            # Add start offset to index if found
            index_list.append(index + self.start if index != -1 else -1 + self.start)
        return np.array(index_list)


    def objects_to_index_matrix(
            self, object_seq_seq: Sequence[Sequence[Any]]) -> np.ndarray:
        """
        Returns a matrix of the indexes associated with the input objects.

        For objects not in the vocabulary, `start-1` is used as the index.

        If the sequences are not all of the same length, shorter sequences will
        have padding added at the end, with `start-1` used as the pad value.

        :param object_seq_seq: A sequence of sequences of objects.
        :return: A 2-dimensional array of the object indexes.
        """
        ##YOUR CODE HERE##
        max_len = 0
        for i in object_seq_seq:
            if len(i) > max_len:
                max_len = len(i)

        index_list = []
        for i in object_seq_seq:
          if len(i) < max_len:
            for j in range(max_len-len(i)):
              i.append(self.start-1)
          index_list.append(self.objects_to_indexes(i))
        return np.array(index_list)

    def objects_to_binary_vector(self, object_seq: Sequence[Any]) -> np.ndarray:
        """
        Returns a binary vector, with a 1 at each index corresponding to one of
        the input objects.

        :param object_seq: A sequence of objects.
        :return: A 1-dimensional array, with 1s at the indexes of each object,
                 and 0s at all other indexes.
        """
        ##YOUR CODE HERE##
        binary_list = [0 for i in range(self.start)]
        for i in self.vocab:
            if i in object_seq:
                binary_list.append(1)
            else:
                binary_list.append(0)
        return np.array(binary_list)

    def objects_to_binary_matrix(
            self, object_seq_seq: Sequence[Sequence[Any]]) -> np.ndarray:
        """
        Returns a binary matrix, with a 1 at each index corresponding to one of
        the input objects.

        :param object_seq_seq: A sequence of sequences of objects.
        :return: A 2-dimensional array, where each row in the array corresponds
                 to a row in the input, with 1s at the indexes of each object,
                 and 0s at all other indexes.
        """
        ##YOUR CODE HERE##
        binary_list = []
        for i in object_seq_seq:
            binary_list.append(self.objects_to_binary_vector(i))
        return np.array(binary_list)


    def indexes_to_objects(self, index_vector: np.ndarray) -> Sequence[Any]:
        """
        Returns a sequence of objects associated with the indexes in the input
        vector.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param index_vector: A 1-dimensional array of indexes
        :return: A sequence of objects, one for each index.
        """
        ##YOUR CODE HERE##
        object_list = []
        for index in np.array(index_vector - self.start):
            obj = list(self.mapping.keys())[list(self.mapping.values()).index(index)] if index in list(self.mapping.values()) else None
            # Only add the object if it is in the vocabulary.
            if obj is not None:
                object_list.append(obj)
        return object_list

    def index_matrix_to_objects(
            self, index_matrix: np.ndarray) -> Sequence[Sequence[Any]]:
        """
        Returns a sequence of sequences of objects associated with the indexes
        in the input matrix.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param index_matrix: A 2-dimensional array of indexes
        :return: A sequence of sequences of objects, one for each index.
        """
        ##YOUR CODE HERE##
        obj_list = []
        for row in index_matrix:
          obj_row = []
          for idx in row:
            if idx >=self.start:
              token = self.index_to_token.get(idx - self.start)
              if token is not None:
                obj_row.append(token)
          obj_list.append(obj_row)
        return list(obj_list)

    def binary_vector_to_objects(self, vector: np.ndarray) -> Sequence[Any]:
        """
        Returns a sequence of the objects identified by the nonzero indexes in
        the input vector.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param vector: A 1-dimensional binary array
        :return: A sequence of objects, one for each nonzero index.
        """
        ##YOUR CODE HERE##
        vector_li = list(vector)
        vector_li = np.array(vector_li[self.start:])
        object_list = []
        for idx, i in enumerate(vector_li):
            if i == 1 and idx < len(self.vocab):
                object_list.append(self.vocab[idx])
        return object_list

    def binary_matrix_to_objects(
            self, binary_matrix: np.ndarray) -> Sequence[Sequence[Any]]:
        """
        Returns a sequence of sequences of objects identified by the nonzero
        indices in the input matrix.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param binary_matrix: A 2-dimensional binary array
        :return: A sequence of sequences of objects, one for each nonzero index.
        """
        ##YOUR CODE HERE##
        object_list = []
        for i in binary_matrix:
            object_list.append(self.binary_vector_to_objects(i))
        return object_list