<a href="https://colab.research.google.com/github/gow504/GOWTHAM/blob/master/_Implement_one_hot_encoding_of_words_or_characters_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from collections import Counter

def one_hot_encoding(data, mode='word'):
    """
    Performs one-hot encoding on words or characters.
    :param data: Input text (string) to be encoded
    :param mode: 'word' for word-level encoding, 'char' for character-level encoding
    :return: Dictionary of one-hot encoded vectors
    """
    if mode == 'word':
        tokens = data.split()
    elif mode == 'char':
        tokens = list(data)
    else:
        raise ValueError("Mode must be 'word' or 'char'")

    vocab = sorted(set(tokens))  # Get unique words/characters
    token_to_index = {token: i for i, token in enumerate(vocab)}

    one_hot_vectors = {}
    for token in tokens:
        one_hot = np.zeros(len(vocab))
        one_hot[token_to_index[token]] = 1
        one_hot_vectors[token] = one_hot

    return one_hot_vectors

# Example usage:
text = "hello world hello"
word_encoded = one_hot_encoding(text, mode='word')
char_encoded = one_hot_encoding(text, mode='char')

print("Word-level encoding:")
for word, encoding in word_encoded.items():
    print(f"{word}: {encoding}")

print("\nCharacter-level encoding:")
for char, encoding in char_encoded.items():
    print(f"{char}: {encoding}")


Word-level encoding:
hello: [1. 0.]
world: [0. 1.]

Character-level encoding:
h: [0. 0. 0. 1. 0. 0. 0. 0.]
e: [0. 0. 1. 0. 0. 0. 0. 0.]
l: [0. 0. 0. 0. 1. 0. 0. 0.]
o: [0. 0. 0. 0. 0. 1. 0. 0.]
 : [1. 0. 0. 0. 0. 0. 0. 0.]
w: [0. 0. 0. 0. 0. 0. 0. 1.]
r: [0. 0. 0. 0. 0. 0. 1. 0.]
d: [0. 1. 0. 0. 0. 0. 0. 0.]
