Permalink
Browse files

cosmetics: many typofixes, suggested by "codespell.py"

The typofixes offered by this patch have been suggested by the
"codespell.py" script.  Reference:
<http://git.profusion.mobi/cgit.cgi/lucas/codespell/>
  • Loading branch information...
1 parent 65f2cc4 commit f759b9bc87f67e9099ed482aa02586ea4ed376a6 @slattarini slattarini committed Feb 6, 2012
@@ -1247,7 +1247,7 @@ def _color_edge(self, edge, linecolor=None, textcolor=None):
"""
Color in an edge with the given colors.
If no colors are specified, use intelligent defaults
- (dependant on selection, etc.)
+ (dependent on selection, etc.)
"""
if edge not in self._edgetags: return
c = self._chart_canvas
View
@@ -263,7 +263,7 @@ def wnb(port=8000, runBrowser=True, logfilename=None):
# may have to shutdown both programs.
#
# Since webbrowser may block, and the webserver will block, we must run
- # them in seperate threads.
+ # them in separate threads.
#
global server_mode, logfile
server_mode = not runBrowser
@@ -608,7 +608,7 @@ def format_lemma(w):
def _collect_all_synsets(word, pos, synset_relations=dict()):
"""
Return a HTML unordered list of synsets for the given word and
- part of speach.
+ part of speech.
"""
return '<ul>%s\n</ul>\n' % \
''.join((_collect_one_synset(word, synset, synset_relations)
@@ -677,7 +677,7 @@ def __init__(self, word, synset_relations=dict()):
"""
Build a reference to a new page.
- word is the word or words (seperated by commas) for which to
+ word is the word or words (separated by commas) for which to
search for synsets of
synset_relations is a dictionary of synset keys to sets of
View
@@ -47,7 +47,7 @@
"I'm not even going to dignify that with an answer.")),
(r'What (.*)',
- ("Do I look like an encylopedia?",
+ ("Do I look like an encyclopedia?",
"Figure it out yourself.")),
(r'Why (.*)',
View
@@ -168,13 +168,13 @@
# desire to do an action
# e.g. "I want to go shopping"
(r'i want to (.*)',
- ( "You may %1 if your heart truely desires to.",
+ ( "You may %1 if your heart truly desires to.",
"You may have to %1.")),
# desire for an object
# e.g. "I want a pony"
(r'i want (.*)',
- ( "Does your heart truely desire %1?",
+ ( "Does your heart truly desire %1?",
"Is this a desire of the heart, or of the mind?")),
# e.g. "I can't wait" or "I can't do this"
View
@@ -18,7 +18,7 @@ class ChunkParserI(ParserI):
"""
A processing interface for identifying non-overlapping groups in
unrestricted text. Typically, chunk parsers are used to find base
- syntactic constituants, such as base noun phrases. Unlike
+ syntactic constituents, such as base noun phrases. Unlike
``ParserI``, ``ChunkParserI`` guarantees that the ``parse()`` method
will always generate a parse.
"""
View
@@ -53,7 +53,7 @@ class ChunkScore(object):
evaluate a chunk parser's output, based on a number of statistics
(precision, recall, f-measure, misssed chunks, incorrect chunks).
It can also combine the scores from the parsing of multiple texts;
- this makes it signifigantly easier to evaluate a chunk parser that
+ this makes it significantly easier to evaluate a chunk parser that
operates one sentence at a time.
Texts are evaluated with the ``score`` method. The results of
@@ -24,7 +24,7 @@
"features" are typically chosen by hand, and indicate which aspects
of the token are relevant to the classification decision. For
example, a document classifier might use a separate feature for each
-word, recording how often that word occured in the document.
+word, recording how often that word occurred in the document.
Featuresets
===========
View
@@ -9,9 +9,9 @@
"""
A classifier model based on maximum entropy modeling framework. This
framework considers all of the probability distributions that are
-empirically consistant with the training data; and chooses the
+empirically consistent with the training data; and chooses the
distribution with the highest entropy. A probability distribution is
-"empirically consistant" with a set of training data if its estimated
+"empirically consistent" with a set of training data if its estimated
frequency with which a class and a feature vector value co-occur is
equal to the actual frequency in the data.
@@ -20,7 +20,7 @@
The term *feature* is usually used to refer to some property of an
unlabeled token. For example, when performing word sense
disambiguation, we might define a ``'prevword'`` feature whose value is
-the word preceeding the target word. However, in the context of
+the word preceding the target word. However, in the context of
maxent modeling, the term *feature* is typically used to refer to a
property of a "labeled" token. In order to prevent confusion, we
will introduce two distinct terms to disambiguate these two different
@@ -1265,7 +1265,7 @@ def calculate_deltas(train_toks, classifier, unattested, ffreq_empirical,
The variables ``nfmap``, ``nfarray``, and ``nftranspose`` are
used to generate a dense encoding for *nf(ltext)*. This
allows ``_deltas`` to calculate *sum1* and *sum2* using
- matrices, which yields a signifigant performance improvement.
+ matrices, which yields a significant performance improvement.
:param train_toks: The set of training tokens.
:type train_toks: list(tuple(dict, str))
@@ -188,7 +188,7 @@ def train(labeled_featuresets, estimator=ELEProbDist):
feature_values = defaultdict(set)
fnames = set()
- # Count up how many times each feature value occured, given
+ # Count up how many times each feature value occurred, given
# the label and featurename.
for featureset, label in labeled_featuresets:
label_freqdist.inc(label)
View
@@ -107,7 +107,7 @@ def _batch_classify(self, featuresets, options):
# Check if something went wrong:
if stderr and not stdout:
if 'Illegal options: -distribution' in stderr:
- raise ValueError('The installed verison of weka does '
+ raise ValueError('The installed version of weka does '
'not support probability distribution '
'output.')
else:
View
@@ -19,7 +19,7 @@ class KMeansClusterer(VectorSpaceClusterer):
process repeats until the cluster memberships stabilise. This is a
hill-climbing algorithm which may converge to a local maximum. Hence the
clustering is often repeated with random initial means and the most
- commonly occuring output means are chosen.
+ commonly occurring output means are chosen.
"""
def __init__(self, num_means, distance, repeats=1,
@@ -647,7 +647,7 @@ def read_sexpr_block(stream, block_size=16384, comment_char=None):
block will be read.
:param comment_char: A character that marks comments. Any lines
that begin with this character will be stripped out.
- (If spaces or tabs preceed the comment character, then the
+ (If spaces or tabs precede the comment character, then the
line will not be stripped.)
"""
start = stream.tell()
@@ -345,7 +345,7 @@ def root_hypernyms(self):
return result
# Simpler implementation which makes incorrect assumption that
-# hypernym hierarcy is acyclic:
+# hypernym hierarchy is acyclic:
#
# if not self.hypernyms():
# return [self]
View
@@ -127,7 +127,7 @@ def join(self, fileid):
Return a new path pointer formed by starting at the path
identified by this pointer, and then following the relative
path given by ``fileid``. The path components of ``fileid``
- should be seperated by forward slashes, regardless of
+ should be separated by forward slashes, regardless of
the underlying file system's path seperator character.
"""
raise NotImplementedError('abstract base class')
@@ -1008,7 +1008,7 @@ def _char_seek_forward(self, offset, est_bytes=None):
ignoring all buffers.
:param est_bytes: A hint, giving an estimate of the number of
- bytes that will be neded to move foward by ``offset`` chars.
+ bytes that will be neded to move forward by ``offset`` chars.
Defaults to ``offset``.
"""
if est_bytes is None: est_bytes = offset
View
@@ -1809,7 +1809,7 @@ def _download_threaded(self, *e):
# download (e.g., clicking 'refresh' or editing the index url).
ds = Downloader(self._ds.url, self._ds.download_dir)
- # Start downloading in a seperate thread.
+ # Start downloading in a separate thread.
assert self._download_msg_queue == []
assert self._download_abort_queue == []
self._DownloadThread(ds, marked, self._download_lock,
View
@@ -103,7 +103,7 @@ def _item_repr(self, item):
Nonterminals must be a single word, such as S or NP or NP_subj.
Currently, nonterminals must consists of alphanumeric characters and
underscores (_). Nonterminals are colored blue. If you place the
-mouse over any nonterminal, then all occurances of that nonterminal
+mouse over any nonterminal, then all occurrences of that nonterminal
will be highlighted.
Termianals must be surrounded by single quotes (') or double
@@ -118,7 +118,7 @@ def _item_repr(self, item):
different line, your production will automatically be colorized. If
there are any errors, they will be highlighted in red.
-Note that the order of the productions is signifigant for some
+Note that the order of the productions is significant for some
algorithms. To re-order the productions, use cut and paste to move
them.
View
@@ -480,7 +480,7 @@ class TreeWidget(CanvasWidget):
``'vertical'``. The default value is ``'vertical'`` (i.e.,
branch downwards).
- - ``shapeable``: whether the subtrees can be independantly
+ - ``shapeable``: whether the subtrees can be independently
dragged by the user. THIS property simply sets the
``DRAGGABLE`` property on all of the ``TreeWidget``'s tree
segments.
View
@@ -1133,7 +1133,7 @@ def rename_variables(fstruct, vars=None, used_vars=(), new_vars=None,
``new_vars``, mapping *v* to the new variable that is used
to replace it.
- To consistantly rename the variables in a set of feature
+ To consistently rename the variables in a set of feature
structures, simply apply rename_variables to each one, using
the same dictionary:
View
@@ -382,7 +382,7 @@ class WeightedProduction(Production, ImmutableProbabilisticMixIn):
has an associated probability, which represents how likely it is that
this production will be used. In particular, the probability of a
``WeightedProduction`` records the likelihood that its right-hand side is
- the correct instantiation for any given occurance of its left-hand side.
+ the correct instantiation for any given occurrence of its left-hand side.
:see: ``Production``
"""
View
@@ -192,8 +192,8 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None,
class ParseError(ValueError):
"""
Exception raised by parse_* functions when they fail.
- :param position: The index in the input string where an error occured.
- :param expected: What was expected when an error occured.
+ :param position: The index in the input string where an error occurred.
+ :param expected: What was expected when an error occurred.
"""
def __init__(self, expected, position):
ValueError.__init__(self, expected, position)
View
@@ -79,7 +79,7 @@ class EdgeI(object):
- A ``TreeEdge`` records which trees have been found to
be (partially) consistent with the text.
- - A ``LeafEdge`` records the tokens occuring in the text.
+ - A ``LeafEdge`` records the tokens occurring in the text.
The ``EdgeI`` interface provides a common interface to both types
of edge, allowing chart parsers to treat them in a uniform manner.
@@ -1549,7 +1549,7 @@ def parses(self, tree_class=Tree):
def set_strategy(self, strategy):
"""
- Change the startegy that the parser uses to decide which edges
+ Change the strategy that the parser uses to decide which edges
to add to the chart.
:type strategy: list(ChartRuleI)
View
@@ -1140,7 +1140,7 @@ class WittenBellProbDist(ProbDistI):
reserved for unseen events is equal to *T / (N + T)*
where *T* is the number of observed event types and *N* is the total
number of observed events. This equates to the maximum likelihood estimate
- of a new type event occuring. The remaining probability mass is discounted
+ of a new type event occurring. The remaining probability mass is discounted
such that all probability estimates sum to one, yielding:
- *p = T / Z (N + T)*, if count = 0
@@ -1155,7 +1155,7 @@ def __init__(self, freqdist, bins=None):
probability mass reserved for unseen events is equal to *T / (N + T)*
where *T* is the number of observed event types and *N* is the total
number of observed events. This equates to the maximum likelihood
- estimate of a new type event occuring. The remaining probability mass
+ estimate of a new type event occurring. The remaining probability mass
is discounted such that all probability estimates sum to one,
yielding:
View
@@ -400,7 +400,7 @@ def cities2table(filename, rel_name, dbname, verbose=False, setup=False):
print "inserting values into %s: " % table_name, t
connection.commit()
if verbose:
- print "Commiting update to %s" % dbname
+ print "Committing update to %s" % dbname
cur.close()
except ImportError:
import warnings
View
@@ -566,7 +566,7 @@ def replace(self, old, new, count=0):
# Check for unicode/bytestring mismatches:
if self._mixed_string_types(old, new, count):
return self._decode_and_call('replace', old, new, count)
- # Use a regexp to find all occurences of old, and replace them w/ new.
+ # Use a regexp to find all occurrences of old, and replace them w/ new.
result = ''
pos = 0
for match in re.finditer(re.escape(old), self):
@@ -929,7 +929,7 @@ def __init__(self, contents, source):
self.source = source
"""A ``StringLocation`` specifying the location where this string
- occured in the source document."""
+ occurred in the source document."""
@property
def begin(self):
View
@@ -48,7 +48,7 @@
Porter, M. "An algorithm for suffix stripping." Program 14.3 (1980): 130-137.
-only differing from it at the points maked --DEPARTURE-- and --NEW--
+only differing from it at the points marked --DEPARTURE-- and --NEW--
below.
For a more faithful version of the Porter algorithm, see
@@ -62,7 +62,7 @@
The 'l' of the 'logi' -> 'log' rule is put with the stem, so that
short stems like 'geo' 'theo' etc work like 'archaeo' 'philo' etc.
- This follows a suggestion of Barry Wilkins, reasearch student at
+ This follows a suggestion of Barry Wilkins, research student at
Birmingham.
View
@@ -13,7 +13,7 @@
# Python version, with some minor modifications of mine, to the description
# presented at http://www.webcitation.org/5NnvdIzOb and to the C source code
# available at http://www.inf.ufrgs.br/~arcoelho/rslp/integrando_rslp.html.
-# Please note that this stemmer is intended for demostration and educational
+# Please note that this stemmer is intended for demonstration and educational
# purposes only. Feel free to write me for any comments, including the
# development of a different and/or better stemmer for Portuguese. I also
# suggest using NLTK's mailing list for Portuguese for any discussion.
View
@@ -1016,7 +1016,7 @@ def train_supervised(self, labelled_sequences, **kwargs):
if estimator is None:
estimator = lambda fdist, bins: MLEProbDist(fdist)
- # count occurences of starting states, transitions out of each state
+ # count occurrences of starting states, transitions out of each state
# and output symbols observed in each state
starting = FreqDist()
transitions = ConditionalFreqDist()
View
@@ -241,7 +241,7 @@ def __repr__(self):
class NgramTagger(ContextTagger, yaml.YAMLObject):
"""
A tagger that chooses a token's tag based on its word string and
- on the preceeding n word's tags. In particular, a tuple
+ on the preceding n word's tags. In particular, a tuple
(tags[i-n:i-1], words[i]) is looked up in a table, and the
corresponding tag is returned. N-gram taggers are typically
trained on a tagged corpus.
@@ -323,7 +323,7 @@ def context(self, tokens, index, history):
class BigramTagger(NgramTagger):
"""
A tagger that chooses a token's tag based its word string and on
- the preceeding words' tag. In particular, a tuple consisting
+ the preceding words' tag. In particular, a tuple consisting
of the previous tag and the word is looked up in a table, and
the corresponding tag is returned.
@@ -349,7 +349,7 @@ def __init__(self, train, model=None,
class TrigramTagger(NgramTagger):
"""
A tagger that chooses a token's tag based its word string and on
- the preceeding two words' tags. In particular, a tuple consisting
+ the preceding two words' tags. In particular, a tuple consisting
of the previous two tags and the word is looked up in a table, and
the corresponding tag is returned.
View
@@ -202,7 +202,7 @@ def _compute_lambda(self):
# However no effect within this function
for tag in self._tri[history].samples():
- # if there has only been 1 occurance of this tag in the data
+ # if there has only been 1 occurrence of this tag in the data
# then ignore this trigram.
if self._uni[tag] == 1:
continue
View
@@ -8,7 +8,7 @@
# $Id$
"""
-Unit tests for the NLTK modules. These tests are intented to ensure
+Unit tests for the NLTK modules. These tests are intended to ensure
that changes that we make to NLTK's code don't accidentally introduce
bugs.
Oops, something went wrong.

0 comments on commit f759b9b

Please sign in to comment.