Permalink
Browse files

added some comments, added setup.py

  • Loading branch information...
1 parent 5429a12 commit 69a9895a51304531e83731d9b7935fc9d69a8074 Jen Fong-Adwent committed Jan 25, 2012
Showing with 70 additions and 2 deletions.
  1. +9 −2 auto_tagify.py
  2. +61 −0 setup.py
View
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
import re
import urllib
@@ -13,16 +14,17 @@
SMART_QUOTES_S = re.compile('(\xe2\x80\x98)|(\xe2\x80\x99)|(\‘)|(\’)')
STOP_WORDS = ['DT', 'IN', 'TO', 'VBD', 'VBD', 'VBG', 'VBN', 'VBZ', 'MD', 'RB', 'CC', 'WDT']
+lemma = WordNetLemmatizer()
+
class AutoTagify():
- lemma = WordNetLemmatizer()
-
def __init__(self):
self.css = ''
self.link = ''
self.text = ''
def generate(self, strict=True):
+ """Return the HTML version of tags for the string."""
tag_words = ''
for (word, word_type) in self._tokenize():
tag_word = self._cleaned(word,strict)
@@ -36,6 +38,10 @@ def generate(self, strict=True):
return tag_words
def tag_list(self, strict=True):
+ """Return the tags from string as a list. If strict is set
+ to True, then only return the stemmed version. Otherwise, return the
+ full string - therefore, `cat` will be considered different from `cats`.
+ """
tag_words = []
for (word, word_type) in self._tokenize():
tag_word = self._cleaned(word,strict)
@@ -44,6 +50,7 @@ def tag_list(self, strict=True):
return tag_words
def _tokenize(self):
+ """Tag words from the string."""
return nltk.pos_tag(nltk.word_tokenize(self._clean_text()))
def _cleaned(self, word, strict):
View
@@ -0,0 +1,61 @@
+from distutils.core import setup
+VERSION = '1.2'
+setup(name='auto_tagify',
+ version=VERSION,
+ author='Edna Piranha',
+ author_email='jen@ednapiranha.com',
+ url='https://github.com/ednapiranha/auto-tagify',
+ download_url='http://bitworking.org/projects/httplib2/dist/httplib2-%s.tar.gz' % VERSION,
+ description='Auto-tags a selection of text and generates links to the tagified versions of the words',
+ license='MIT',
+ long_description="""
+
+Auto Tagify is a simple auto tagging module that uses NLTK to generate tags out of a selection of text. Any text that is less than 3 characters long or matches a particular POS (part-of-speech) will be ignored.
+
+There are two operations Auto Tagify performs - one returns the selection of text with links embedded in the string and the other returns a list of all the taggable words as the stem word (using lemmatization).
+
+For the first operation, everything is optional, but it is most effective to enter some text. Optional parameters you can set are the paths for tag links and the css classes for link. For instance, if you set your tag routing to a relative path such as /tags/<tagged_word> and want to use the css class named "tagged":
+
+from auto_tagify import AutoTagify
+
+t = AutoTagify()
+
+t.text = "This is the text to display!"
+
+t.link = "/tags"
+
+t.css = "tagged"
+
+t.generate()
+
+The result will be: This is the <a href="/tags/text" class="tagged">text</a> to <a href="/tags/display" class="tagged">display!</a>
+
+If no link is set, the default path is "/<tagged word>", such as "/text".
+
+For the second operation, you will only receive a list of all your taggable words from the text. You can call it like so:
+
+t.text = "This text is tagged kittens"
+
+t.tag_list()
+
+The result will be a list: ['text', 'tag', 'kitten']
+
+By default, generate() and tag_list() will be in strict mode, which means all special characters will be stripped. If generate(False) or tag_list(False) is set, then special characters will be url encoded.
+
+These two operations are sufficient for you to maintain tag counts and tag references to text in your application.
+ """,
+ packages=['auto_tagify'],
+ keywords='tagging tags html nltk english',
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'Environment :: Web Environment',
+ 'Intended Audience :: Developers',
+ 'Intended Audience :: Science/Research',
+ 'License :: OSI Approved :: MIT License',
+ 'Natural Language :: English',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python',
+ 'Topic :: Internet :: WWW/HTTP',
+ 'Topic :: Software Development :: Libraries',
+ ],
+ )

0 comments on commit 69a9895

Please sign in to comment.