From 1ccb9f0e62b0f5b19d9fb790d68d6ac945c9e6c6 Mon Sep 17 00:00:00 2001 From: Lucas Moura Date: Fri, 24 Mar 2017 14:08:01 -0300 Subject: [PATCH 1/3] Add NgramCharModel to text.py --- text.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/text.py b/text.py index 855e89aaf..64ba0a63c 100644 --- a/text.py +++ b/text.py @@ -26,6 +26,7 @@ def samples(self, n): return ' '.join(self.sample() for i in range(n)) + class NgramTextModel(CountingProbDist): """This is a discrete probability distribution over n-tuples of words. @@ -50,11 +51,15 @@ def add(self, ngram): self.cond_prob[ngram[:-1]] = CountingProbDist() self.cond_prob[ngram[:-1]].add(ngram[-1]) + def add_empty(self, words, n): + return ['', ] * (n - 1) + words + def add_sequence(self, words): """Add each of the tuple words[i:i+n], using a sliding window. Prefix some copies of the empty word, '', to make the start work.""" n = self.n - words = ['', ] * (n - 1) + words + words = self.add_empty(words, n) + for i in range(len(words) - n): self.add(tuple(words[i:i + n])) @@ -72,6 +77,15 @@ def samples(self, nwords): nminus1gram = nminus1gram[1:] + (wn,) return ' '.join(output) + +class NgramCharModel(NgramTextModel): + def add_empty(self, words, n): + return ' ' * (n - 1) + words + + def add_sequence(self, words): + for word in words: + super().add_sequence(word) + # ______________________________________________________________________________ From fd5475cffb1f69e925372fe7c51d9afb54b7ecde Mon Sep 17 00:00:00 2001 From: Peter Norvig Date: Fri, 24 Mar 2017 22:02:17 -0700 Subject: [PATCH 2/3] Update text.py --- text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text.py b/text.py index 64ba0a63c..65eef28f6 100644 --- a/text.py +++ b/text.py @@ -52,7 +52,7 @@ def add(self, ngram): self.cond_prob[ngram[:-1]].add(ngram[-1]) def add_empty(self, words, n): - return ['', ] * (n - 1) + words + return [''] * (n - 1) + words def add_sequence(self, words): """Add each of the tuple words[i:i+n], using a sliding window. From 903701e63e088afdba3b7d9259ab8d96226f45fa Mon Sep 17 00:00:00 2001 From: Peter Norvig Date: Fri, 24 Mar 2017 22:02:52 -0700 Subject: [PATCH 3/3] Update