From 71af3be340c57171837478555e2eb0d496318cfc Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Sat, 9 Apr 2016 23:16:01 +0000 Subject: [PATCH] 7040 Detect common spelling errors in manual pages Reviewed by: Marcel Telka Approved by: Dan McDonald --- .../pkg/manifests/developer-build-onbld.mf | 2 + usr/src/tools/onbld/Checks/Makefile | 2 + usr/src/tools/onbld/Checks/SpellCheck.py | 284 ++++++++++++++++++ usr/src/tools/onbld/Checks/__init__.py | 3 +- usr/src/tools/scripts/git-pbchk.py | 5 +- 5 files changed, 293 insertions(+), 3 deletions(-) create mode 100644 usr/src/tools/onbld/Checks/SpellCheck.py diff --git a/usr/src/pkg/manifests/developer-build-onbld.mf b/usr/src/pkg/manifests/developer-build-onbld.mf index a02522c4edcc..85d11480c241 100644 --- a/usr/src/pkg/manifests/developer-build-onbld.mf +++ b/usr/src/pkg/manifests/developer-build-onbld.mf @@ -168,6 +168,8 @@ file path=opt/onbld/lib/python2.6/onbld/Checks/Mapfile.py mode=0444 file path=opt/onbld/lib/python2.6/onbld/Checks/Mapfile.pyc mode=0444 file path=opt/onbld/lib/python2.6/onbld/Checks/ProcessCheck.py mode=0444 file path=opt/onbld/lib/python2.6/onbld/Checks/ProcessCheck.pyc mode=0444 +file path=opt/onbld/lib/python2.6/onbld/Checks/SpellCheck.py mode=0444 +file path=opt/onbld/lib/python2.6/onbld/Checks/SpellCheck.pyc mode=0444 file path=opt/onbld/lib/python2.6/onbld/Checks/__init__.py mode=0444 file path=opt/onbld/lib/python2.6/onbld/Checks/__init__.pyc mode=0444 file path=opt/onbld/lib/python2.6/onbld/Scm/Backup.py mode=0444 diff --git a/usr/src/tools/onbld/Checks/Makefile b/usr/src/tools/onbld/Checks/Makefile index 413a58098133..02f13b8351e0 100644 --- a/usr/src/tools/onbld/Checks/Makefile +++ b/usr/src/tools/onbld/Checks/Makefile @@ -25,6 +25,7 @@ # Copyright 2010, Richard Lowe # Copyright 2014 Garrett D'Amore +# Copyright 2016, Joyent, Inc. include $(SRC)/Makefile.master include ../../Makefile.tools @@ -42,6 +43,7 @@ PYSRCS = \ ManLint.py \ Mapfile.py \ ProcessCheck.py \ + SpellCheck.py \ __init__.py PYOBJS = $(PYSRCS:%.py=%.pyc) diff --git a/usr/src/tools/onbld/Checks/SpellCheck.py b/usr/src/tools/onbld/Checks/SpellCheck.py new file mode 100644 index 000000000000..e8320243f1bd --- /dev/null +++ b/usr/src/tools/onbld/Checks/SpellCheck.py @@ -0,0 +1,284 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2016 Joyent, Inc. +# + +import re, sys + +spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n' +altMsg = '%s: Lined %d contains "%s"; please use "%s" instead for consistency with other documentation\n' + +misspellings = { + 'absense': 'absence', + 'accessable': 'accessible', + 'accomodate': 'accommodate', + 'accomodation': 'accommodation', + 'accross': 'across', + 'acheive': 'achieve', + 'addional': 'additional', + 'addres': 'address', + 'admininistrative': 'administrative', + 'adminstered': 'administered', + 'adminstrate': 'administrate', + 'adminstration': 'administration', + 'adminstrative': 'administrative', + 'adminstrator': 'administrator', + 'admissability': 'admissibility', + 'adress': 'address', + 'adressable': 'addressable', + 'adressed': 'addressed', + 'adressing': 'addressing, dressing', + 'aginst': 'against', + 'agression': 'aggression', + 'agressive': 'aggressive', + 'alot': 'a lot, allot', + 'and and': 'and', + 'apparantly': 'apparently', + 'appearence': 'appearance', + 'arguement': 'argument', + 'assasination': 'assassination', + 'auxilliary': 'auxiliary', + 'basicly': 'basically', + 'begining': 'beginning', + 'belive': 'believe', + 'beteen': 'between', + 'betwen': 'between', + 'beween': 'between', + 'bewteen': 'between', + 'bizzare': 'bizarre', + 'buisness': 'business', + 'calender': 'calendar', + 'cemetary': 'cemetery', + 'chauffer': 'chauffeur', + 'collegue': 'colleague', + 'comming': 'coming', + 'commited': 'committed', + 'commitee': 'committee', + 'commiting': 'committing', + 'comparision': 'comparison', + 'comparisions': 'comparisons', + 'compatability': 'compatibility', + 'compatable': 'compatible', + 'compatablity': 'compatibility', + 'compatiable': 'compatible', + 'compatiblity': 'compatibility', + 'completly': 'completely', + 'concious': 'conscious', + 'condidtion': 'condition', + 'conected': 'connected', + 'conjuction': 'conjunction', + 'continous': 'continuous', + 'curiousity': 'curiosity', + 'deamon': 'daemon', + 'definately': 'definitely', + 'desireable': 'desirable', + 'diffrent': 'different', + 'dilemna': 'dilemma', + 'dissapear': 'disappear', + 'dissapoint': 'disappoint', + 'ecstacy': 'ecstasy', + 'embarass': 'embarrass', + 'enviroment': 'environment', + 'exept': 'except', + 'existance': 'existence', + 'familar': 'familiar', + 'finaly': 'finally', + 'folowing': 'following', + 'foriegn': 'foreign', + 'forseeable': 'foreseeable', + 'fourty': 'forty', + 'foward': 'forward', + 'freind': 'friend', + 'futher': 'further', + 'gaurd': 'guard', + 'glamourous': 'glamorous', + 'goverment': 'government', + 'happend': 'happened', + 'harrassment': 'harassment', + 'hierachical': 'hierarchical', + 'hierachies': 'hierarchies', + 'hierachy': 'hierarchy', + 'hierarcical': 'hierarchical', + 'hierarcy': 'hierarchy', + 'honourary': 'honorary', + 'humourous': 'humorous', + 'idiosyncracy': 'idiosyncrasy', + 'immediatly': 'immediately', + 'inaccessable': 'inaccessible', + 'inbetween': 'between', + 'incidently': 'incidentally', + 'independant': 'independent', + 'infomation': 'information', + 'interupt': 'interrupt', + 'intial': 'initial', + 'intially': 'initially', + 'irresistable': 'irresistible', + 'jist': 'gist', + 'knowlege': 'knowledge', + 'lenght': 'length', + 'liase': 'liaise', + 'liason': 'liaison', + 'libary': 'library', + 'maching': 'machine, marching, matching', + 'millenia': 'millennia', + 'millenium': 'millennium', + 'neccessary': 'necessary', + 'negotation': 'negotiation', + 'nontheless': 'nonetheless', + 'noticable': 'noticeable', + 'occassion': 'occasion', + 'occassional': 'occasional', + 'occassionally': 'occasionally', + 'occurance': 'occurrence', + 'occured': 'occurred', + 'occurence': 'occurrence', + 'occuring': 'occurring', + 'ommision': 'omission', + 'orginal': 'original', + 'orginally': 'originally', + 'pavillion': 'pavilion', + 'peice': 'piece', + 'persistant': 'persistent', + 'politican': 'politician', + 'posession': 'possession', + 'possiblity': 'possibility', + 'preceed': 'precede', + 'preceeded': 'preceded', + 'preceeding': 'preceding', + 'preceeds': 'precedes', + 'prefered': 'preferred', + 'prefering': 'preferring', + 'presense': 'presence', + 'proces': 'process', + 'propoganda': 'propaganda', + 'psuedo': 'pseudo', + 'publically': 'publicly', + 'realy': 'really', + 'reciept': 'receipt', + 'recieve': 'receive', + 'recieved': 'received', + 'reciever': 'receiver', + 'recievers': 'receivers', + 'recieves': 'receives', + 'recieving': 'receiving', + 'recomend': 'recommend', + 'recomended': 'recommended', + 'recomending': 'recommending', + 'recomends': 'recommends', + 'recurse': 'recur', + 'recurses': 'recurs', + 'recursing': 'recurring', + 'refered': 'referred', + 'refering': 'referring', + 'religous': 'religious', + 'rember': 'remember', + 'remeber': 'remember', + 'repetion': 'repetition', + 'reponsible': 'responsible', + 'resistence': 'resistance', + 'retreive': 'retrieve', + 'seige': 'siege', + 'sence': 'since', + 'seperate': 'separate', + 'seperated': 'separated', + 'seperately': 'separately', + 'seperates': 'separates', + 'similiar': 'similar', + 'somwhere': 'somewhere', + 'sould': 'could, should, sold, soul', + 'sturcture': 'structure', + 'succesful': 'successful', + 'succesfully': 'successfully', + 'successfull': 'successful', + 'sucessful': 'successful', + 'supercede': 'supersede', + 'supress': 'suppress', + 'supressed': 'suppressed', + 'suprise': 'surprise', + 'suprisingly': 'surprisingly', + 'sytem': 'system', + 'tendancy': 'tendency', + 'the the': 'the', + 'the these': 'these', + 'therefor': 'therefore', + 'threshhold': 'threshold', + 'tolerence': 'tolerance', + 'tommorow': 'tomorrow', + 'tommorrow': 'tomorrow', + 'tounge': 'tongue', + 'tranformed': 'transformed', + 'transfered': 'transferred', + 'truely': 'truly', + 'trustworthyness': 'trustworthiness', + 'unforseen': 'unforeseen', + 'unfortunatly': 'unfortunately', + 'unsuccessfull': 'unsuccessful', + 'untill': 'until', + 'upto': 'up to', + 'whereever': 'wherever', + 'wich': 'which', + 'wierd': 'weird', + 'wtih': 'with', +} + +alternates = { +} + +misspellingREs = [] +alternateREs = [] + +for misspelling, correct in misspellings.iteritems(): + regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE) + entry = (regex, misspelling, correct) + misspellingREs.append(entry) + +for alternate, correct in alternates.iteritems(): + regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE) + entry = (regex, alternate, correct) + alternateREs.append(entry) + +def check(errmsg, output, filename, line, lineno, entry): + if entry[0].search(line): + output.write(errmsg % (filename, lineno, entry[1], entry[2])) + return 1 + else: + return 0 + +def spellcheck(fh, filename=None, output=sys.stderr, **opts): + lineno = 1 + ret = 0 + + if not filename: + filename = fh.name + + fh.seek(0) + for line in fh: + for entry in misspellingREs: + ret |= check(spellMsg, output, filename, line, + lineno, entry) + for entry in alternateREs: + ret |= check(altMsg, output, filename, line, + lineno, entry) + lineno += 1 + + return ret diff --git a/usr/src/tools/onbld/Checks/__init__.py b/usr/src/tools/onbld/Checks/__init__.py index 7051b0c56577..9fdd25d43c13 100644 --- a/usr/src/tools/onbld/Checks/__init__.py +++ b/usr/src/tools/onbld/Checks/__init__.py @@ -41,4 +41,5 @@ 'JStyle', 'Keywords', 'ManLint', - 'Mapfile'] + 'Mapfile', + 'SpellCheck'] diff --git a/usr/src/tools/scripts/git-pbchk.py b/usr/src/tools/scripts/git-pbchk.py index 92303f967f7a..8dea5a8785c8 100644 --- a/usr/src/tools/scripts/git-pbchk.py +++ b/usr/src/tools/scripts/git-pbchk.py @@ -48,7 +48,7 @@ from onbld.Scm import Ignore from onbld.Checks import Comments, Copyright, CStyle, HdrChk -from onbld.Checks import JStyle, Keywords, ManLint, Mapfile +from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck class GitError(Exception): @@ -291,11 +291,12 @@ def jstyle(root, parent, flist, output): def manlint(root, parent, flist, output): ret = 0 - output.write("Man page format:\n") + output.write("Man page format/spelling:\n") ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE) for f in flist(lambda x: ManfileRE.match(x)): fh = open(f, 'r') ret |= ManLint.manlint(fh, output=output, picky=True) + ret |= SpellCheck.spellcheck(fh, output=output) fh.close() return ret