15 changes: 11 additions & 4 deletions libindic/stemmer/__init__.py
Expand Up @@ -33,7 +33,8 @@ class Malayalam:

def __init__(self, verbose=False):
self.verbose = verbose
self.rules_file = os.path.join(os.path.dirname(__file__), 'data/ml.rules')
self.rules_file = os.path.join(
os.path.dirname(__file__), 'data/ml.rules')
self.rulesDict = None
self.normalizer = normalizer.getInstance()
self.dictionary_file = open(os.path.join(
Expand Down Expand Up @@ -106,7 +107,11 @@ def stem(self, text):
suffix = result[counter:] # Right to left suffix stripping
if suffix in self.rulesDict:
if self.verbose:
print("\t Satisfying rule found : ", suffix, " = ", self.rulesDict[suffix])
print(
"\t Satisfying rule found : ",
suffix,
" = ",
self.rulesDict[suffix])
result = result[:counter] + self.rulesDict[suffix]
# A satisfying rule found, continue stemming.
found = True
Expand All @@ -127,7 +132,7 @@ def LoadRules(self):
rule_number = 0
rules_file = codecs.open(self.rules_file, encoding='utf-8',
errors='ignore')
while 1:
while True:
line_number = line_number + 1
try:
text = unicode(rules_file.readline())
Expand All @@ -143,7 +148,9 @@ def LoadRules(self):
if(line == ""):
continue
if(len(line.split("=")) != 2):
print("[Error] Syntax Error in the Rules. Line number: ", line_number)
print(
"[Error] Syntax Error in the Rules. Line number: ",
line_number)
print("Line: " + text)
continue
lhs = line.split("=")[0].strip()
Expand Down
18 changes: 9 additions & 9 deletions libindic/stemmer/tests/tests_stemmer.py
Expand Up @@ -43,7 +43,7 @@ def test_accusative(self):
word = self.stemmer.singleencode(word)
obtained = self.stemmer.stem(word)[word]
if self.verbosity:
print expected, obtained
print(expected, obtained)
assert obtained == expected

def test_conjuctive(self):
Expand Down Expand Up @@ -76,7 +76,7 @@ def test_conjuctive(self):
word = self.stemmer.singleencode(word)
obtained = self.stemmer.stem(word)[word]
if self.verbosity:
print expected, obtained
print(expected, obtained)
assert obtained == expected

def test_dative(self):
Expand Down Expand Up @@ -109,7 +109,7 @@ def test_dative(self):
word = self.stemmer.singleencode(word)
obtained = self.stemmer.stem(word)[word]
if self.verbosity:
print expected, obtained
print(expected, obtained)
assert obtained == expected

def test_instrumental(self):
Expand Down Expand Up @@ -141,7 +141,7 @@ def test_instrumental(self):
word = self.stemmer.singleencode(word)
obtained = self.stemmer.stem(word)[word]
if self.verbosity:
print expected, obtained
print(expected, obtained)
assert obtained == expected

def test_possessive(self):
Expand Down Expand Up @@ -175,7 +175,7 @@ def test_possessive(self):
word = self.stemmer.singleencode(word)
obtained = self.stemmer.stem(word)[word]
if self.verbosity:
print expected, obtained
print(expected, obtained)
assert obtained == expected

def test_locative(self):
Expand Down Expand Up @@ -206,7 +206,7 @@ def test_locative(self):
word = self.stemmer.singleencode(word)
obtained = self.stemmer.stem(word)[word]
if self.verbosity:
print expected, obtained
print(expected, obtained)
assert obtained == expected

def test_and(self):
Expand Down Expand Up @@ -241,7 +241,7 @@ def test_and(self):
word = self.stemmer.singleencode(word)
obtained = self.stemmer.stem(word)[word]
if self.verbosity:
print expected, obtained
print(expected, obtained)
assert obtained == expected

def test_plurals(self):
Expand Down Expand Up @@ -273,7 +273,7 @@ def test_plurals(self):
word = self.stemmer.singleencode(word)
obtained = self.stemmer.stem(word)[word]
if self.verbosity:
print expected, obtained
print(expected, obtained)
assert obtained == expected

def test_failures(self):
Expand All @@ -286,5 +286,5 @@ def test_failures(self):
word = self.stemmer.singleencode(word)
obtained = self.stemmer.stem(word)[word]
if self.verbosity:
print expected, obtained
print(expected, obtained)
assert obtained == expected
283 changes: 0 additions & 283 deletions libindic/stemmer/tests/tests_stemmer.py.bak

This file was deleted.

27 changes: 27 additions & 0 deletions setup.cfg
@@ -0,0 +1,27 @@
[metadata]
name = stemmer
summary = Obtain stem from inflected word
description-file =
README.rst
author = Balasankar C
author-email = balasankarc@autistici.org
home-page =
classifier =
#Environment :: Web Environment
#Framework :: Flask
Intended Audience :: Developers
Intended Audience :: Information Technology
License :: OSI Approved
License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
Programming Language :: Python
Programming Language :: Python :: 3

[files]
packages = stemmer

[build-sphinx]
all_files = 1
source-dir = docs

[wheel]
universal = 1
24 changes: 3 additions & 21 deletions setup.py
@@ -1,26 +1,8 @@
#!/usr/bin/env python

from setuptools import setup, find_packages

name = "stemmer"
from setuptools import setup

setup(
name=name,
version="0.1",
license="LGPL-3.0",
description="Malayalam word stemmer",
author="Santhosh Thottingal",
author_email="santhosh.thottingal@gmail.com",
long_description="""This application helps you to stem the words
in the given text. Currently supports only Malayalam.
Note that this is very experimental and uses a rule based approach.
""",
namespace_packages=['libindic'],
packages=find_packages(),
include_package_data=True,
setup_requires=['setuptools-git'],
install_requires=['setuptools', 'normalizer'],
test_suite="libindic.stemmer.tests",
zip_safe=False,
setup_requires=['pbr'],
pbr=True,
)
6 changes: 6 additions & 0 deletions test-requirements.txt
@@ -0,0 +1,6 @@
testrepository
python-subunit
flake8
mccabe
coverage
coveralls