<a href="https://colab.research.google.com/github/dhamu2908/EnglishToTeluguTranslation/blob/main/English_Telugu_Using_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import pickle
import re
from collections import Counter, defaultdict
from typing import List, Tuple, Dict, Optional
import matplotlib.pyplot as plt

**Simple RNN Cell**

In [2]:
class SimpleRNN:
  def __init__(self, input_size, hidden_size):
    #input size : How many features each word has (embedding dimension)
    #hidden size: how much memory RNN has

    self.input_size = input_size
    self.hidden_size = hidden_size

    #weight matrix for input --> hidden(Wxh)

    self.Wxh = np.random.randn(input_size, hidden_size) * 0.1

    #weight matrix for hidden-->hidden (Whh)
    self.Whh = np.random.randn(hidden_size, hidden_size) * 0.1

    #Bias term
    self.bh = np.zeros((1, hidden_size))

  #Forward

  def forward(self, x, h_prev):
    #Process the current input
    input_contributed = np.dot(x, self.Wxh)

    memory_contribution = np.dot(h_prev, self.Whh)

    combined = input_contributed + memory_contribution + self.bh

    h_next = np.tanh(combined)

    return h_next



**Word Embedding**

In [7]:
class Word2Vec:
  def __init__(self):
    self.model = None
    self.vocab = {}
    self.embeddings = None

  def create_vocab(self, sentences):
    self.vocab = {"<PAD>" : 0, "<UNK>" : 1}

    #Adding all unique words
    for sentence in sentences:
      words = sentence.lower().split()
      for word in words:
        if word not in self.vocab:
          self.vocab[word] = len(self.vocab)

    print(f"Vocab size : {len(self.vocab)}")
    return self.vocab

  def train_word2vec(self, sentences, embedding_dim = 50):
    print("Training Word2Vec ....")

    #Preparing sentences for word2vec

    word_lists = []
    for sentence in sentences:
      words = sentence.lower().split()
      word_lists.appened(words)

    #Train word2vec
    self.model = Word2Vec(sentences = word_lists, vector_size = embedding_dim,
                          window = 3, min_count = 1, workers = 1)

    print("Word2Vec trained with {embedding_dim} dimensions")

    return self.model

  def create_embedding_matrix(self, vocab, embedding_dim):
    print("Creating embedding matrix ....")

    vocab_size = len(vocab)

    embeddings = np.random.randn(vocab_size, embedding_dim) * 0.1

    found = 0

    for word, index in vocab.items():
      if word in self.model.wv:
        embeddings[index] = self.model.wv[word]
        found += 1

    print(f"{found} words found in embedding")

    self.embeddings = embeddings

    return embeddings


