In [2]:
class SimpleLangModel:
    K = 1  # smoothing parameter

    def __init__(self, corpus):
        self.gram1 = defaultdict(int)
        self.gram2 = defaultdict(int)
        self.gram3 = defaultdict(int)
        self.totalWords = 0

        self.train(corpus)

    def train(self, corpus):
        words = corpus.split()
        self.totalWords = len(words)

        for i in range(len(words)):
            wordID = words[i]
            self.gram1[wordID] += 1

            if i > 0:
                wordID1 = words[i - 1]
                self.gram2[(wordID1, wordID)] += 1

            if i > 1:
                wordID2 = words[i - 2]
                self.gram3[(wordID2, wordID1, wordID)] += 1

    def predict(self, sentence1, sentence2):
        metrics1 = self.calculateMetrics(sentence1)
        metrics2 = self.calculateMetrics(sentence2)

        if metrics1 > metrics2:
            return sentence1
        else:
            return sentence2

    def calculateMetrics(self, sentence):
        result = 0
        words = sentence.split()

        for i in range(2, len(words)):
            p2 = self.getGram3Prob(words[i - 2], words[i - 1], words[i])
            p3 = self.getGram2Prob(words[i - 2], words[i - 1])
            p4 = self.getGram1Prob(words[i - 2])
            result += math.log(p2) + math.log(p3) + math.log(p4)

        return result

    def getGram1Prob(self, wordID):
        wordCounts = self.gram1.get(wordID, 0) + SimpleLangModel.K
        vocabSize = len(self.gram1)
        return float(wordCounts) / (self.totalWords + vocabSize)

    def getGram2Prob(self, wordID1, wordID2):
        countsWord1 = self.gram1.get(wordID1, 0) + self.totalWords
        countsBigram = self.gram2.get((wordID1, wordID2), 0) + SimpleLangModel.K
        return float(countsBigram) / countsWord1

    def getGram3Prob(self, wordID1, wordID2, wordID3):
        countsGram2 = self.gram2.get((wordID1, wordID2), 0) + self.totalWords
        countsGram3 = self.gram3.get((wordID1, wordID2, wordID3), 0) + SimpleLangModel.K
        return float(countsGram3) / countsGram2

# Example usage
corpus = "Mr Patrick is our new principal. The company accepted all the terms. Please don’t keep your dog on the loose. The latter is my best friend. I need some stationary products for my craftwork. The actor accepted the Oscar. I will call you later in the evening. Covid affects the lungs. The council of the ministers were sworn in yesterday. Robert too wants to accompany us to the park. Mia will counsel me about choosing fashion as my career. The bear at the zoo was very playful. The sheep have a lot of fur that keeps them warm. The hot spring is at the furthest corner of the street. Can you advise me on how to study for exams? The team will lose the match if they don’t play well. Can you go to the market for me? The teachers asked the students to keep quiet. The heap of garbage should be cleaned immediately. This is their house."
model = SimpleLangModel(corpus)

sentences = [
    ("Mr Patrick is our new principal.", "Mr Patrick is our new principle."),
    ("The company accepted all the terms.", "The company excepted all the terms."),
    ("Please don’t keep your dog on the loose.", "Please don’t keep your dog on the lose."),
    ("The latter is my best friend.", "The latter is my best friend."),
    ("I need some stationary products for my craftwork.", "I need some stationery products for my craftwork."),
    ("The actor accepted the Oscar.", "The actor excepted the Oscar."),
    ("I will call you later in the evening.", "I will call you latter in the evening."),
    ("Covid affects the lungs.", "Covid effects the lungs."),
    ("The council of the ministers were sworn in yesterday.", "The council of the ministers was sworn in yesterday."),
    ("Robert too wants to accompany us to the park.", "Robert to wants to accompany us to the park."),
    ("Mia will counsel me about choosing fashion as my career.", "Mia will council me about choosing fashion as my career."),
    ("The bear at the zoo was very playful.", "The bare at the zoo was very playful."),
    ("The sheep have a lot of fur that keeps them warm.", "The sheep have a lot of far that keeps them warm."),
    ("The hot spring is at the furthest corner of the street.", "The hot spring is at the further corner of the street."),
    ("Can you advise me on how to study for exams?", "Can you advice me on how to study for exams?"),
    ("The team will lose the match if they don’t play well.", "The team will loose the match if they don’t play well."),
    ("Can you go to the market for me?", "Can you too to the market for me?"),
    ("The teachers asked the students to keep quiet.", "The teachers asked the students to keep quite."),
    ("The heap of garbage should be cleaned immediately.", "The heap of garbage should be clean immediately."),
    ("This is their house.", "This is there house.")
]

for sentence1, sentence2 in sentences:
    best_option = model.predict(sentence1, sentence2)
    print(f"Original Sentence 1: {sentence1}")
    print(f"Original Sentence 2: {sentence2}")
    print(f"Best Option: {best_option}\n")


Original Sentence 1: Mr Patrick is our new principal.
Original Sentence 2: Mr Patrick is our new principle.
Best Option: Mr Patrick is our new principal.

Original Sentence 1: The company accepted all the terms.
Original Sentence 2: The company excepted all the terms.
Best Option: The company accepted all the terms.

Original Sentence 1: Please don’t keep your dog on the loose.
Original Sentence 2: Please don’t keep your dog on the lose.
Best Option: Please don’t keep your dog on the loose.

Original Sentence 1: The latter is my best friend.
Original Sentence 2: The latter is my best friend.
Best Option: The latter is my best friend.

Original Sentence 1: I need some stationary products for my craftwork.
Original Sentence 2: I need some stationery products for my craftwork.
Best Option: I need some stationary products for my craftwork.

Original Sentence 1: The actor accepted the Oscar.
Original Sentence 2: The actor excepted the Oscar.
Best Option: The actor accepted the Oscar.

Origi

In [1]:
import math
from collections import defaultdict