# a. BASIC MACHINE TRANSLATION USING RULE-BASED METHODS

In [2]:
dictionary = { 
    'hello': 'bonjour', 
    'world': 'monde', 
    'my': 'mon', 
    'name': 'nom', 
    'is': 'est' 
}
grammar_rules = { 
    'SVO': ['subject', 'verb', 'object'] 
}  
def translate(sentence): 
    words = sentence.lower().split() 
    translated_words = [dictionary.get(word, word) for word in words] 
    return ' '.join(translated_words) 
sentence = "Hello world" 
print(translate(sentence))  


bonjour monde


# b. ENGLISH TO FRENCH TRANSLATION USING SEQ2SEQ WITH ATTENTION

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense, Attention
import numpy as np

# --- Sample Dataset ---
eng = ["i like apples", "they do not like grapefruit"]
fr  = ["j aime les pommes", "ils n aiment pas le pamplemousse"]

eng_vocab = {w:i+1 for i,w in enumerate(set(" ".join(eng).split()))}
fr_vocab  = {w:i+1 for i,w in enumerate(set(" ".join(fr).split()))}
inv_fr = {i:w for w,i in fr_vocab.items()}

def encode(s, vocab, max_len=6): 
    return [vocab[w] for w in s.split()]+[0]*(max_len-len(s.split()))

X = np.array([encode(s,eng_vocab) for s in eng])
Y = np.array([encode(s,fr_vocab) for s in fr])

# --- Seq2Seq with Attention ---
class Seq2Seq(tf.keras.Model):
    def __init__(self, in_vocab, out_vocab, emb=16, units=32):
        super().__init__()
        self.enc_emb = Embedding(in_vocab+1, emb)
        self.enc_lstm = LSTM(units, return_sequences=True, return_state=True)
        self.dec_emb = Embedding(out_vocab+1, emb)
        self.dec_lstm = LSTM(units, return_sequences=True, return_state=True)
        self.att, self.fc = Attention(), Dense(out_vocab+1, activation="softmax")

    def call(self, inputs):
        x, y = inputs  # unpack
        enc_out,h,c = self.enc_lstm(self.enc_emb(x))
        dec_out,_,_ = self.dec_lstm(self.dec_emb(y), initial_state=[h,c])
        ctx = self.att([dec_out, enc_out])
        return self.fc(tf.concat([dec_out, ctx], -1))

model = Seq2Seq(len(eng_vocab), len(fr_vocab))
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
model.fit([X,Y], np.expand_dims(Y,-1), epochs=100, verbose=0)

# --- Translation ---
test = np.array([encode("they do not like grapefruit", eng_vocab)])
start = np.array([encode("ils", fr_vocab)])  # seed word
pred = np.argmax(model.predict([test,start])[0],-1)
print("Translation:", " ".join(inv_fr.get(i,"") for i in pred))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 457ms/step
Translation: ils n aiment pas le pamplemousse


# c. NEURAL MACHINE TRANSLATION WITH TRANSFORMERS (ENGLISH TO GERMAN) (google Collab)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
chat_history_ids = None
print(" Conversational AI Chatbot (type 'quit' to exit)")
while True:
    user_input = input("You: ")
    if user_input.lower() in ["quit", "exit"]:
        print("Bot: Goodbye! 👋")
        break
    new_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt')
    if chat_history_ids is not None:
        bot_input_ids = torch.cat([chat_history_ids, new_input_ids], dim=-1)
    else:
        bot_input_ids = new_input_ids
    chat_history_ids = model.generate(
        bot_input_ids,
        max_length=1000,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95
    )
    bot_output = tokenizer.decode(
        chat_history_ids[:, bot_input_ids.shape[-1]:][0],
        skip_special_tokens=True
    )
    print("Bot:", bot_output)