Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
582 lines (570 sloc) 16.2 KB
package wordlist;
#stolen from http://cpansearch.perl.org/src/APOCAL/Pod-Spell-CommonMistakes-1.000/lib/Pod/Spell/CommonMistakes/WordList.pm
my %common = (
"abandonning" => "abandoning",
"abigious" => "ambiguous",
"abitrate" => "arbitrate",
"abov" => "above",
"absense" => "absence",
"absolut" => "absolute",
"absoulte" => "absolute",
"acceleratoin" => "acceleration",
"accelleration" => "acceleration",
"accesing" => "accessing",
"accesnt" => "accent",
"accessable" => "accessible",
"accidentaly" => "accidentally",
"accidentually" => "accidentally",
"accomodate" => "accommodate",
"accomodates" => "accommodates",
"accout" => "account",
"acess" => "access",
"acessable" => "accessible",
"ackowledge" => "acknowledge",
"ackowledged" => "acknowledged",
"acknowldegement" => "acknowldegement",
"acording" => "according",
"activete" => "activate",
"acumulating" => "accumulating",
"addional" => "additional",
"additionaly" => "additionally",
"addreses" => "addresses",
"aditional" => "additional",
"aditionally" => "additionally",
"aditionaly" => "additionally",
"adress" => "address",
"adresses" => "addresses",
"adviced" => "advised",
"afecting" => "affecting",
"albumns" => "albums",
"alegorical" => "allegorical",
"algorith" => "algorithm",
"algorithmical" => "algorithmically",
"algoritm" => "algorithm",
"algoritms" => "algorithms",
"algorrithm" => "algorithm",
"algorritm" => "algorithm",
"allpication" => "application",
"alot" => "a lot",
"alow" => "allow",
"alows" => "allows",
"altough" => "although",
"ambigious" => "ambiguous",
"amoung" => "among",
"amout" => "amount",
"analysator" => "analyzer",
"ang" => "and",
"anniversery" => "anniversary",
"annoucement" => "announcement",
"anomolies" => "anomalies",
"anomoly" => "anomaly",
"aplication" => "application",
"appearence" => "appearance",
"appropiate" => "appropriate",
"appropriatly" => "appropriately",
"aquired" => "acquired",
"arbitary" => "arbitrary",
"architechture" => "architecture",
"arguement" => "argument",
"arguements" => "arguments",
"aritmetic" => "arithmetic",
"arne't" => "aren't",
"arraival" => "arrival",
"artifical" => "artificial",
"artillary" => "artillery",
"assigment" => "assignment",
"assigments" => "assignments",
"assistent" => "assistant",
"asuming" => "assuming",
"atomatically" => "automatically",
"attemps" => "attempts",
"attruibutes" => "attributes",
"authentification" => "authentication",
"automaticaly" => "automatically",
"automaticly" => "automatically",
"automatize" => "automate",
"automatized" => "automated",
"automatizes" => "automates",
"autonymous" => "autonomous",
"auxilliary" => "auxiliary",
"avaiable" => "available",
"availabled" => "available",
"availablity" => "availability",
"availale" => "available",
"availavility" => "availability",
"availble" => "available",
"availble" => "available",
"availiable" => "available",
"avaliable" => "available",
"avaliable" => "available",
"backgroud" => "background",
"bahavior" => "behavior",
"baloon" => "balloon",
"baloons" => "balloons",
"batery" => "battery",
"becomming" => "becoming",
"becuase" => "because",
"begining" => "beginning",
"calender" => "calendar",
"cancelation" => "cancellation",
"capabilites" => "capabilities",
"capatibilities" => "capabilities",
"cariage" => "carriage",
"challange" => "challenge",
"challanges" => "challenges",
"changable" => "changeable",
"charachter" => "character",
"charachters" => "characters",
"charcter" => "character",
"childs" => "children",
"chnage" => "change",
"chnages" => "changes",
"choosen" => "chosen",
"collapsable" => "collapsible",
"colorfull" => "colorful",
"comand" => "command",
"comit" => "commit",
"commerical" => "commercial",
"comminucation" => "communication",
"commited" => "committed",
"commiting" => "committing",
"committ" => "commit",
"commoditiy" => "commodity",
"compability" => "compatibility",
"compatability" => "compatibility",
"compatable" => "compatible",
"compatibiliy" => "compatibility",
"compatibilty" => "compatibility",
"compleatly" => "completely",
"completly" => "completely",
"complient" => "compliant",
"compres" => "compress",
"compresion" => "compression",
"configuratoin" => "configuration",
"conjuction" => "conjunction",
"connectinos" => "connections",
"connnection" => "connection",
"connnections" => "connections",
"consistancy" => "consistency",
"containes" => "contains",
"containts" => "contains",
"contence" => "contents",
"continous" => "continuous",
"continueing" => "continuing",
"contraints" => "constraints",
"convertor" => "converter",
"convinient" => "convenient",
"corected" => "corrected",
"correponding" => "corresponding",
"correponds" => "corresponds",
"correspoding" => "corresponding",
"cryptocraphic" => "cryptographic",
"curently" => "currently",
"dafault" => "default",
"deafult" => "default",
"deamon" => "daemon",
"debain" => "Debian",
"debians" => "Debian's",
"decompres" => "decompress",
"definate" => "definite",
"definately" => "definitely",
"delemiter" => "delimiter",
"dependancies" => "dependencies",
"dependancy" => "dependency",
"dependant" => "dependent",
"desactivate" => "deactivate",
"detabase" => "database",
"developement" => "development",
"developped" => "developed",
"developpement" => "development",
"developper" => "developer",
"deveolpment" => "development",
"devided" => "divided",
"dictionnary" => "dictionary",
"diplay" => "display",
"disapeared" => "disappeared",
"discontiguous" => "noncontiguous",
"dispertion" => "dispersion",
"dissapears" => "disappears",
"docuentation" => "documentation",
"documantation" => "documentation",
"documentaion" => "documentation",
"dont" => "don't",
"downlad" => "download",
"downlads" => "downloads",
"easilly" => "easily",
"ecspecially" => "especially",
"edditable" => "editable",
"editting" => "editing",
"eletronic" => "electronic",
"enchanced" => "enhanced",
"encorporating" => "incorporating",
"endianess" => "endianness",
"enhaced" => "enhanced",
"enlightnment" => "enlightenment",
"enocded" => "encoded",
"enterily" => "entirely",
"enviroiment" => "environment",
"enviroment" => "environment",
"environement" => "environment",
"environent" => "environment",
"equivelant" => "equivalent",
"equivilant" => "equivalent",
"excecutable" => "executable",
"exceded" => "exceeded",
"excellant" => "excellent",
"exlcude" => "exclude",
"exlcusive" => "exclusive",
"expecially" => "especially",
"explicitely" => "explicitly",
"expresion" => "expression",
"exprimental" => "experimental",
"extention" => "extension",
"failuer" => "failure",
"familar" => "familiar",
"fatser" => "faster",
"feauture" => "feature",
"feautures" => "features",
"fetaure" => "feature",
"fetaures" => "features",
"forse" => "force",
"fortan" => "fortran",
"forwardig" => "forwarding",
"framwork" => "framework",
"fuction" => "function",
"fuctions" => "functions",
"functionaly" => "functionally",
"functionnality" => "functionality",
"functonality" => "functionality",
"futhermore" => "furthermore",
"generiously" => "generously",
"grahical" => "graphical",
"grahpical" => "graphical",
"grapic" => "graphic",
"guage" => "gauge",
"halfs" => "halves",
"heirarchically" => "hierarchically",
"helpfull" => "helpful",
"hierachy" => "hierarchy",
"hierarchie" => "hierarchy",
"howver" => "however",
"immeadiately" => "immediately",
"implemantation" => "implementation",
"implemention" => "implementation",
"incomming" => "incoming",
"incompatabilities" => "incompatibilities",
"incompatable" => "incompatible",
"inconsistant" => "inconsistent",
"indendation" => "indentation",
"indended" => "intended",
"independant" => "independent",
"informatiom" => "information",
"informations" => "information",
"infromation" => "information",
"initalize" => "initialize",
"initators" => "initiators",
"initializiation" => "initialization",
"inofficial" => "unofficial",
"integreated" => "integrated",
"integrety" => "integrity",
"integrey" => "integrity",
"intendet" => "intended",
"interchangable" => "interchangeable",
"intermittant" => "intermittent",
"interupted" => "interrupted",
"jave" => "java",
"langage" => "language",
"langauage" => "language",
"langugage" => "language",
"lauch" => "launch",
"lenght" => "length",
"lesstiff" => "lesstif",
"libaries" => "libraries",
"libary" => "library",
"libraris" => "libraries",
"licenceing" => "licencing",
"loggging" => "logging",
"loggin" => "login",
"logile" => "logfile",
"machinary" => "machinery",
"maintainance" => "maintenance",
"maintainence" => "maintenance",
"makeing" => "making",
"malplace" => "misplace",
"malplaced" => "misplaced",
"managable" => "manageable",
"manoeuvering" => "maneuvering",
"mathimatical" => "mathematical",
"mathimatic" => "mathematic",
"mathimatics" => "mathematics",
"ment" => "meant",
"messsage" => "message",
"messsages" => "messages",
"microprocesspr" => "microprocessor",
"milliseonds" => "milliseconds",
"miscelleneous" => "miscellaneous",
"misformed" => "malformed",
"mispelled" => "misspelled",
"mmnemonic" => "mnemonic",
"modulues" => "modules",
"monochorome" => "monochrome",
"monochromo" => "monochrome",
"monocrome" => "monochrome",
"mroe" => "more",
"multidimensionnal" => "multidimensional",
"mulitplied" => "multiplied",
"mutiple" => "multiple",
"nam" => "name",
"nams" => "names",
"navagating" => "navigating",
"nead" => "need",
"neccesary" => "necessary",
"neccessary" => "necessary",
"necesary" => "necessary",
"negotation" => "negotiation",
"nescessary" => "necessary",
"nessessary" => "necessary",
"noticable" => "noticeable",
"notications" => "notifications",
"o'caml" => "OCaml",
"omitt" => "omit",
"ommitted" => "omitted",
"onself" => "oneself",
"optionnal" => "optional",
"optmizations" => "optimizations",
"orientatied" => "orientated",
"orientied" => "oriented",
"ouput" => "output",
"overaall" => "overall",
"overriden" => "overridden",
"pacakge" => "package",
"pachage" => "package",
"packacge" => "package",
"packege" => "package",
"packge" => "package",
"pakage" => "package",
"pallette" => "palette",
"paramameters" => "parameters",
"paramater" => "parameter",
"parametes" => "parameters",
"paramter" => "parameter",
"paramters" => "parameters",
"particularily" => "particularly",
"pased" => "passed",
"peprocessor" => "preprocessor",
"perfoming" => "performing",
"permissons" => "permissions",
"persistant" => "persistent",
"plattform" => "platform",
"pleaes" => "please",
"ploting" => "plotting",
"posible" => "possible",
"postgressql" => "PostgreSQL",
"powerfull" => "powerful",
"preceeded" => "preceded",
"preceeding" => "preceding",
"precendence" => "precedence",
"precission" => "precision",
"prefered" => "preferred",
"prefferably" => "preferably",
"prepaired" => "prepared",
"primative" => "primitive",
"princliple" => "principle",
"priorty" => "priority",
"procceed" => "proceed",
"proccesors" => "processors",
"proces" => "process",
"processessing" => "processing",
"processpr" => "processor",
"processsing" => "processing",
"progams" => "programs",
"programers" => "programmers",
"programm" => "program",
"programms" => "programs",
"promps" => "prompts",
"pronnounced" => "pronounced",
"prononciation" => "pronunciation",
"pronouce" => "pronounce",
"pronunce" => "pronounce",
"propery" => "property",
"prosess" => "process",
"protable" => "portable",
"protcol" => "protocol",
"protecion" => "protection",
"protocoll" => "protocol",
"psychadelic" => "psychedelic",
"quering" => "querying",
"reasearch" => "research",
"reasearcher" => "researcher",
"reasearchers" => "researchers",
"recieved" => "received",
"recieve" => "receive",
"reciever" => "receiver",
"recogniced" => "recognised",
"recognizeable" => "recognizable",
"recommanded" => "recommended",
"redircet" => "redirect",
"redirectrion" => "redirection",
"reenabled" => "re-enabled",
"reenable" => "re-enable",
"reencode" => "re-encode",
"refence" => "reference",
"registerd" => "registered",
"registraration" => "registration",
"regulamentations" => "regulations",
"remoote" => "remote",
"removeable" => "removable",
"repectively" => "respectively",
"replacments" => "replacements",
"replys" => "replies",
"requiere" => "require",
"requred" => "required",
"resizeable" => "resizable",
"ressize" => "resize",
"ressource" => "resource",
"retransmited" => "retransmitted",
"runned" => "ran",
"runnning" => "running",
"safly" => "safely",
"savable" => "saveable",
"searchs" => "searches",
"secund" => "second",
"separatly" => "separately",
"sepcify" => "specify",
"seperated" => "separated",
"seperately" => "separately",
"seperate" => "separate",
"seperatly" => "separately",
"seperator" => "separator",
"sequencial" => "sequential",
"serveral" => "several",
"setts" => "sets",
"similiar" => "similar",
"simliar" => "similar",
"speach" => "speech",
"speciefied" => "specified",
"specifed" => "specified",
"specificaton" => "specification",
"specifing" => "specifying",
"speficied" => "specified",
"speling" => "spelling",
"splitted" => "split",
"staically" => "statically",
"standardss" => "standards",
"standart" => "standard",
"staticly" => "statically",
"subdirectoires" => "subdirectories",
"suble" => "subtle",
"succesfully" => "successfully",
"succesful" => "successful",
"sucessfully" => "successfully",
"superflous" => "superfluous",
"superseeded" => "superseded",
"suplied" => "supplied",
"suport" => "support",
"suppored" => "supported",
"supportin" => "supporting",
"suppoted" => "supported",
"suppported" => "supported",
"suppport" => "support",
"surpresses" => "suppresses",
"suspicously" => "suspiciously",
"synax" => "syntax",
"synchonized" => "synchronized",
"syncronize" => "synchronize",
"syncronizing" => "synchronizing",
"syncronus" => "synchronous",
"syste" => "system",
"sythesis" => "synthesis",
"taht" => "that",
"throught" => "through",
"transfering" => "transferring",
"trasmission" => "transmission",
"treshold" => "threshold",
"trigerring" => "triggering",
"unecessary" => "unnecessary",
"unexecpted" => "unexpected",
"unfortunatelly" => "unfortunately",
"unknonw" => "unknown",
"unkown" => "unknown",
"unuseful" => "useless",
"usefull" => "useful",
"usera" => "users",
"usetnet" => "Usenet",
"usualy" => "usually",
"utilites" => "utilities",
"utillities" => "utilities",
"utilties" => "utilities",
"utiltity" => "utility",
"utitlty" => "utility",
"variantions" => "variations",
"varient" => "variant",
"verbse" => "verbose",
"verisons" => "versions",
"verison" => "version",
"verson" => "version",
"vicefersa" => "vice-versa",
"visiters" => "visitors",
"vitual" => "virtual",
"whataver" => "whatever",
"wheter" => "whether",
"wierd" => "weird",
"yur" => "your",
);
# extra words contributed by CPAN users, thanks!
# split it up for easier maintenance of Lintian data
my %common_cpan = (
"refering" => "referring",
"writeable" => "writable",
"nineth" => "ninth",
"ommited" => "omitted",
"omited" => "omitted",
"requrie" => "require",
"existant" => "existent",
"explict" => "explicit",
"agument" => "augument",
"destionation" => "destination",
);
#Seen in github commits for spelling correction
my %bypablo = ( "syncronous" => "synchronous",
"arn't" => "aren't",
"excption" => "exception",
"remotly" => "remotely",
"occured" => "occurred",
"githuub" => "github",
"majourity" => "majority",
"systemm" => "system");
%common = ( %common, %common_cpan , %bypablo);
sub fix_text {
my $text = shift @_;
foreach my $k (keys %common) {
if ($text =~ s/\b($k)\b/preserve_case($1,$common{$k})/egi) {
print "Swapped on $k update to $common{$k}\n";
}
}
return $text;
}
sub preserve_case {
my ($old, $new) = @_;
my $mask = uc $old ^ $old;
$mask = substr($mask, 0, length $new);
return uc $new | $mask;
}
sub check_common {
my $text = shift @_;
# Holds the failures we saw
my %err;
my @words = split(/\s+/,$text);
# Logic taken from Lintian::Check::check_spelling(), thanks!
foreach my $w ( @words ) {
my $lcw = lc( $w );
if ( exists $common{ $lcw } ) {
print "Error was $lcw\n";
return 1;
}
}
return 0;
}
use base 'Exporter';
our @EXPORT = qw{fix_text check_common};
1;
Jump to Line
Something went wrong with that request. Please try again.