In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Sample English and Tamil sentences
english_sentences = ["I like to eat pizza.", "He is a good boy.", "She speaks English."]
tamil_sentences = ["நான் பீச்சா சாப்பிடுவது பிட்சாவின் போது.", "அவன் ஒரு நல்ல பை.", "அவள் ஆங்கிலம் பேசுகின்றாள்."]

# Initialize tokenizers for English and Tamil
english_tokenizer = Tokenizer(oov_token="<OOV>")
tamil_tokenizer = Tokenizer(oov_token="<OOV>")

# Fit the tokenizers on the sentences
english_tokenizer.fit_on_texts(english_sentences)
tamil_tokenizer.fit_on_texts(tamil_sentences)

# Convert text sequences to numerical sequences
english_sequences = english_tokenizer.texts_to_sequences(english_sentences)
tamil_sequences = tamil_tokenizer.texts_to_sequences(tamil_sentences)

# Pad sequences to make them of equal length
max_seq_length = 5  # Set your desired sequence length (maximum length of sentences)
english_sequences = pad_sequences(english_sequences, maxlen=max_seq_length, padding='post', truncating='post')
tamil_sequences = pad_sequences(tamil_sequences, maxlen=max_seq_length, padding='post', truncating='post')

# Define model parameters
hidden_units = 256
embedding_dim = 100
num_encoder_tokens = len(english_tokenizer.word_index) + 1
num_decoder_tokens = len(tamil_tokenizer.word_index) + 1

# Define the encoder
encoder_inputs = Input(shape=(max_seq_length,))
encoder_embedding = Embedding(input_dim=num_encoder_tokens, output_dim=embedding_dim)(encoder_inputs)
encoder_rnn, encoder_state = SimpleRNN(hidden_units, return_state=True)(encoder_embedding)

# Define the decoder
decoder_inputs = Input(shape=(max_seq_length,))
decoder_embedding = Embedding(input_dim=num_decoder_tokens, output_dim=embedding_dim)(decoder_inputs)
decoder_rnn = SimpleRNN(hidden_units, return_sequences=True)(decoder_embedding, initial_state=encoder_state)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_rnn)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model with 'sparse_categorical_crossentropy' loss
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define the decoder target data
decoder_target_data = tamil_sequences

# Train the model
model.fit([english_sequences, tamil_sequences], decoder_target_data[:, :, np.newaxis], epochs=10, batch_size=64, validation_split=0.2)


In [None]:
def translate_sentence(input_sentence):
    # Tokenize and pad the input sentence
    input_sequence = english_tokenizer.texts_to_sequences([input_sentence])
    input_sequence = pad_sequences(input_sequence, maxlen=max_seq_length, padding='post', truncating='post')

    # Get the model's prediction
    predicted_tamil_sequence = model.predict([input_sequence, input_sequence])  # Use the input_sequence twice for encoder and decoder inputs

    # Convert the predicted sequence back to text
    predicted_tamil_sequence = np.argmax(predicted_tamil_sequence, axis=-1)
    predicted_tamil_sentence = tamil_tokenizer.sequences_to_texts(predicted_tamil_sequence)[0]

    return predicted_tamil_sentence


In [None]:
english_input ="i like pizza"
translated_tamil = translate_sentence(english_input)
print(translated_tamil)