-
Notifications
You must be signed in to change notification settings - Fork 2
/
spell_correct.py
54 lines (40 loc) · 1.87 KB
/
spell_correct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/python
# -*- coding: <UTF-8> -*-
import re
from collections import Counter
def words(text):
return re.findall(r'\w+',text)
f = open("words.txt")
vocab = f.read()
f.close()
vocab = vocab.splitlines()
def single_edit(word):
letters = ' "് ാ ി ീ ു ൂ െ ൃ െ ൌ ം അ ആ ഇ ഉ ഋ എ ഏ ഒ ക ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഢ ഡ ണ ത ഫ ദ ധ ന പ ഫ ബ ഭ മ യ ര റ ല ള ഴ വ ശ ഷ സ ഹ ൺ ൻ ർ ൽ ൾ ക്ക ക്ഷ ങ്ക ങ്ങ ച്ച ഞ്ച ഞ്ഞ ട്ട ണ്ട ണ്ണ ത്ത ദ്ധ ന്ത ന്ദ ന്ന പ്പ മ്പ മ്മ യ്യ ല്ല ള്ള ്യ ്ര ്വ"'
letters = letters.split(" ")
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
deletes = [L + R[1:] for L, R in splits if R]
transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
inserts = [L + c + R for L, R in splits for c in letters]
return list(set(deletes + transposes + replaces +inserts))#2 is priority value
def two_letter_edit(words):
e1 = list(single_edit(words))
word = []
for e2 in e1:
word.append(single_edit(e2))
return word
def candidates(word):
candidates = [word]
if word in vocab:
return candidates,len(candidates) == 1
else:
for i in single_edit(word):
if i in vocab:
candidates.append(i)
for i in two_letter_edit(word):
if i in vocab:
candidates.append(i)
return list(filter(None,candidates)),len(candidates) == 1
def correction(word):
probable_candidates = candidates(word)
return probable_candidates