Skip to content

Commit

Permalink
7040 Detect common spelling errors in manual pages
Browse files Browse the repository at this point in the history
Reviewed by: Marcel Telka <marcel@telka.sk>
Approved by: Dan McDonald <danmcd@omniti.com>
  • Loading branch information
melloc authored and rmustacc committed Jun 21, 2016
1 parent b13c838 commit 71af3be
Show file tree
Hide file tree
Showing 5 changed files with 293 additions and 3 deletions.
2 changes: 2 additions & 0 deletions usr/src/pkg/manifests/developer-build-onbld.mf
Expand Up @@ -168,6 +168,8 @@ file path=opt/onbld/lib/python2.6/onbld/Checks/Mapfile.py mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/Mapfile.pyc mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/ProcessCheck.py mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/ProcessCheck.pyc mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/SpellCheck.py mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/SpellCheck.pyc mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/__init__.py mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/__init__.pyc mode=0444
file path=opt/onbld/lib/python2.6/onbld/Scm/Backup.py mode=0444
Expand Down
2 changes: 2 additions & 0 deletions usr/src/tools/onbld/Checks/Makefile
Expand Up @@ -25,6 +25,7 @@

# Copyright 2010, Richard Lowe
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
# Copyright 2016, Joyent, Inc.

include $(SRC)/Makefile.master
include ../../Makefile.tools
Expand All @@ -42,6 +43,7 @@ PYSRCS = \
ManLint.py \
Mapfile.py \
ProcessCheck.py \
SpellCheck.py \
__init__.py

PYOBJS = $(PYSRCS:%.py=%.pyc)
Expand Down
284 changes: 284 additions & 0 deletions usr/src/tools/onbld/Checks/SpellCheck.py
@@ -0,0 +1,284 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright 2016 Joyent, Inc.
#

import re, sys

spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
altMsg = '%s: Lined %d contains "%s"; please use "%s" instead for consistency with other documentation\n'

misspellings = {
'absense': 'absence',
'accessable': 'accessible',
'accomodate': 'accommodate',
'accomodation': 'accommodation',
'accross': 'across',
'acheive': 'achieve',
'addional': 'additional',
'addres': 'address',
'admininistrative': 'administrative',
'adminstered': 'administered',
'adminstrate': 'administrate',
'adminstration': 'administration',
'adminstrative': 'administrative',
'adminstrator': 'administrator',
'admissability': 'admissibility',
'adress': 'address',
'adressable': 'addressable',
'adressed': 'addressed',
'adressing': 'addressing, dressing',
'aginst': 'against',
'agression': 'aggression',
'agressive': 'aggressive',
'alot': 'a lot, allot',
'and and': 'and',
'apparantly': 'apparently',
'appearence': 'appearance',
'arguement': 'argument',
'assasination': 'assassination',
'auxilliary': 'auxiliary',
'basicly': 'basically',
'begining': 'beginning',
'belive': 'believe',
'beteen': 'between',
'betwen': 'between',
'beween': 'between',
'bewteen': 'between',
'bizzare': 'bizarre',
'buisness': 'business',
'calender': 'calendar',
'cemetary': 'cemetery',
'chauffer': 'chauffeur',
'collegue': 'colleague',
'comming': 'coming',
'commited': 'committed',
'commitee': 'committee',
'commiting': 'committing',
'comparision': 'comparison',
'comparisions': 'comparisons',
'compatability': 'compatibility',
'compatable': 'compatible',
'compatablity': 'compatibility',
'compatiable': 'compatible',
'compatiblity': 'compatibility',
'completly': 'completely',
'concious': 'conscious',
'condidtion': 'condition',
'conected': 'connected',
'conjuction': 'conjunction',
'continous': 'continuous',
'curiousity': 'curiosity',
'deamon': 'daemon',
'definately': 'definitely',
'desireable': 'desirable',
'diffrent': 'different',
'dilemna': 'dilemma',
'dissapear': 'disappear',
'dissapoint': 'disappoint',
'ecstacy': 'ecstasy',
'embarass': 'embarrass',
'enviroment': 'environment',
'exept': 'except',
'existance': 'existence',
'familar': 'familiar',
'finaly': 'finally',
'folowing': 'following',
'foriegn': 'foreign',
'forseeable': 'foreseeable',
'fourty': 'forty',
'foward': 'forward',
'freind': 'friend',
'futher': 'further',
'gaurd': 'guard',
'glamourous': 'glamorous',
'goverment': 'government',
'happend': 'happened',
'harrassment': 'harassment',
'hierachical': 'hierarchical',
'hierachies': 'hierarchies',
'hierachy': 'hierarchy',
'hierarcical': 'hierarchical',
'hierarcy': 'hierarchy',
'honourary': 'honorary',
'humourous': 'humorous',
'idiosyncracy': 'idiosyncrasy',
'immediatly': 'immediately',
'inaccessable': 'inaccessible',
'inbetween': 'between',
'incidently': 'incidentally',
'independant': 'independent',
'infomation': 'information',
'interupt': 'interrupt',
'intial': 'initial',
'intially': 'initially',
'irresistable': 'irresistible',
'jist': 'gist',
'knowlege': 'knowledge',
'lenght': 'length',
'liase': 'liaise',
'liason': 'liaison',
'libary': 'library',
'maching': 'machine, marching, matching',
'millenia': 'millennia',
'millenium': 'millennium',
'neccessary': 'necessary',
'negotation': 'negotiation',
'nontheless': 'nonetheless',
'noticable': 'noticeable',
'occassion': 'occasion',
'occassional': 'occasional',
'occassionally': 'occasionally',
'occurance': 'occurrence',
'occured': 'occurred',
'occurence': 'occurrence',
'occuring': 'occurring',
'ommision': 'omission',
'orginal': 'original',
'orginally': 'originally',
'pavillion': 'pavilion',
'peice': 'piece',
'persistant': 'persistent',
'politican': 'politician',
'posession': 'possession',
'possiblity': 'possibility',
'preceed': 'precede',
'preceeded': 'preceded',
'preceeding': 'preceding',
'preceeds': 'precedes',
'prefered': 'preferred',
'prefering': 'preferring',
'presense': 'presence',
'proces': 'process',
'propoganda': 'propaganda',
'psuedo': 'pseudo',
'publically': 'publicly',
'realy': 'really',
'reciept': 'receipt',
'recieve': 'receive',
'recieved': 'received',
'reciever': 'receiver',
'recievers': 'receivers',
'recieves': 'receives',
'recieving': 'receiving',
'recomend': 'recommend',
'recomended': 'recommended',
'recomending': 'recommending',
'recomends': 'recommends',
'recurse': 'recur',
'recurses': 'recurs',
'recursing': 'recurring',
'refered': 'referred',
'refering': 'referring',
'religous': 'religious',
'rember': 'remember',
'remeber': 'remember',
'repetion': 'repetition',
'reponsible': 'responsible',
'resistence': 'resistance',
'retreive': 'retrieve',
'seige': 'siege',
'sence': 'since',
'seperate': 'separate',
'seperated': 'separated',
'seperately': 'separately',
'seperates': 'separates',
'similiar': 'similar',
'somwhere': 'somewhere',
'sould': 'could, should, sold, soul',
'sturcture': 'structure',
'succesful': 'successful',
'succesfully': 'successfully',
'successfull': 'successful',
'sucessful': 'successful',
'supercede': 'supersede',
'supress': 'suppress',
'supressed': 'suppressed',
'suprise': 'surprise',
'suprisingly': 'surprisingly',
'sytem': 'system',
'tendancy': 'tendency',
'the the': 'the',
'the these': 'these',
'therefor': 'therefore',
'threshhold': 'threshold',
'tolerence': 'tolerance',
'tommorow': 'tomorrow',
'tommorrow': 'tomorrow',
'tounge': 'tongue',
'tranformed': 'transformed',
'transfered': 'transferred',
'truely': 'truly',
'trustworthyness': 'trustworthiness',
'unforseen': 'unforeseen',
'unfortunatly': 'unfortunately',
'unsuccessfull': 'unsuccessful',
'untill': 'until',
'upto': 'up to',
'whereever': 'wherever',
'wich': 'which',
'wierd': 'weird',
'wtih': 'with',
}

alternates = {
}

misspellingREs = []
alternateREs = []

for misspelling, correct in misspellings.iteritems():
regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
entry = (regex, misspelling, correct)
misspellingREs.append(entry)

for alternate, correct in alternates.iteritems():
regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
entry = (regex, alternate, correct)
alternateREs.append(entry)

def check(errmsg, output, filename, line, lineno, entry):
if entry[0].search(line):
output.write(errmsg % (filename, lineno, entry[1], entry[2]))
return 1
else:
return 0

def spellcheck(fh, filename=None, output=sys.stderr, **opts):
lineno = 1
ret = 0

if not filename:
filename = fh.name

fh.seek(0)
for line in fh:
for entry in misspellingREs:
ret |= check(spellMsg, output, filename, line,
lineno, entry)
for entry in alternateREs:
ret |= check(altMsg, output, filename, line,
lineno, entry)
lineno += 1

return ret
3 changes: 2 additions & 1 deletion usr/src/tools/onbld/Checks/__init__.py
Expand Up @@ -41,4 +41,5 @@
'JStyle',
'Keywords',
'ManLint',
'Mapfile']
'Mapfile',
'SpellCheck']
5 changes: 3 additions & 2 deletions usr/src/tools/scripts/git-pbchk.py
Expand Up @@ -48,7 +48,7 @@

from onbld.Scm import Ignore
from onbld.Checks import Comments, Copyright, CStyle, HdrChk
from onbld.Checks import JStyle, Keywords, ManLint, Mapfile
from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck


class GitError(Exception):
Expand Down Expand Up @@ -291,11 +291,12 @@ def jstyle(root, parent, flist, output):

def manlint(root, parent, flist, output):
ret = 0
output.write("Man page format:\n")
output.write("Man page format/spelling:\n")
ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
for f in flist(lambda x: ManfileRE.match(x)):
fh = open(f, 'r')
ret |= ManLint.manlint(fh, output=output, picky=True)
ret |= SpellCheck.spellcheck(fh, output=output)
fh.close()
return ret

Expand Down

1 comment on commit 71af3be

@andy-js
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey! Some of those are British spellings, not misspellings.

Please sign in to comment.