In [None]:
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt

class IBMModel1:
    def __init__(self):
        self.translation_probs = defaultdict(lambda: defaultdict(float))
        self.log_likelihoods = []

    def train(self, english_sentences, hindi_sentences, num_iterations=5):
        # Initialize translation probabilities uniformly
        for e_sent, h_sent in zip(english_sentences, hindi_sentences):
            for ew in e_sent:
                for hw in h_sent:
                    self.translation_probs[ew][hw] = 1.0 / len(h_sent)

        # Train IBM Model 1
        for iteration in range(num_iterations):
            count = defaultdict(float)
            total = defaultdict(float)
            log_likelihood = 0.0

            for e_sent, h_sent in zip(english_sentences, hindi_sentences):
                for ew in e_sent:
                    total_e = sum(self.translation_probs[ew][hw] for hw in h_sent)
                    for hw in h_sent:
                        count[(ew, hw)] += self.translation_probs[ew][hw] / total_e
                        total[hw] += self.translation_probs[ew][hw] / total_e
                    log_likelihood += np.log(total_e)

            self.log_likelihoods.append(log_likelihood)

            for hw in total:
                for ew in self.translation_probs:
                    self.translation_probs[ew][hw] = count[(ew, hw)] / total[hw]

    def translate(self, english_sentence):
        translation = []
        for ew in english_sentence.lower().split():
            if ew in self.translation_probs:
                hw = max(self.translation_probs[ew], key=self.translation_probs[ew].get)
                translation.append(hw)
            else:
                # Apply smoothing for unseen words
                translation.append("<UNK>")
        return ' '.join(translation)

    def plot_log_likelihood(self):
        plt.plot(range(1, len(self.log_likelihoods) + 1), self.log_likelihoods)
        plt.xlabel('Iteration')
        plt.ylabel('Log-Likelihood')
        plt.title('Log-Likelihood During Training')
        plt.show()

# Read English and Hindi sentences from files
with open('English.txt', 'r', encoding='utf-8') as f:
    english_sentences = [line.strip().lower().split() for line in f]

with open('Hindi.txt', 'r', encoding='utf-8') as f:
    hindi_sentences = [line.strip().split() for line in f]

# Initialize and train IBM Model 1
ibm_model = IBMModel1()
ibm_model.train(english_sentences, hindi_sentences)

# Plot log-likelihood during training
#ibm_model.plot_log_likelihood()

# Translate function
def translate_english_to_hindi(sentence):
    return ibm_model.translate(sentence)

# Interactive translation
while True:
    english_sentence = input("Enter an English sentence for translation (or 'quit' to exit): ")
    if english_sentence.lower() == 'quit':
        break
    translated_sentence = translate_english_to_hindi(english_sentence)
    print("Translated Sentence:", translated_sentence)

Translated Sentence: मुझे हमें सेब
Translated Sentence: मुझे हमें <UNK> सेब
Translated Sentence: चीन है, बर्फीला सन्यासी, अगस्त
Translated Sentence: के है, <UNK> ठंडा <UNK>
