Skip to content
This repository
Browse code

Initial collection of shaaady perl scripts to do the spelling correct…

…ion magic
  • Loading branch information...
commit c5f98b5c0dce5d8258e3f8f0ac9d0ca6c45ab311 1 parent 3119bd8
Holden Karau authored August 28, 2011
19  find_errors.pl
... ...
@@ -0,0 +1,19 @@
  1
+#!/usr/bin/perl
  2
+$| = 1;
  3
+use LWP::UserAgent;
  4
+use Text::SpellChecker;
  5
+require('wordlist.pl');
  6
+
  7
+my $ua = new LWP::UserAgent;
  8
+print "Hello!\n";
  9
+while (<>) {
  10
+    print "Checking $_\n";
  11
+    my $url = "https://www.github.com/".$_;
  12
+    $url =~ s/tree/raw/;
  13
+    my $res = $ua->get($url);
  14
+    my $rt = $res->as_string();
  15
+    $rt." githuub";
  16
+    if (check_common($rt)) {
  17
+	print "error: ".$url;
  18
+    }
  19
+}
105  fix_pandas.pl
... ...
@@ -0,0 +1,105 @@
  1
+#!/usr/bin/perl -s
  2
+$| = 1;
  3
+use LWP::UserAgent;
  4
+use Text::SpellChecker;
  5
+use Net::Twitter;
  6
+require('wordlist.pl');
  7
+
  8
+my $c = 0;
  9
+my $token = "";
  10
+my $user = "";
  11
+
  12
+my $consumer_key;
  13
+my $consumer_secret;
  14
+
  15
+my $nt = Net::Twitter->new(
  16
+    traits   => [qw/OAuth API::REST/],
  17
+    consumer_key        => $consumer_key,
  18
+    consumer_secret     => $consumer_secret,
  19
+);
  20
+$nt->access_token();
  21
+$nt->access_token_secret();
  22
+
  23
+my $ua = new LWP::UserAgent;
  24
+print "Hello!\n";
  25
+print "Connecting to github!\n";
  26
+print "Reading input\n";
  27
+while (my $l = <>) {
  28
+    if ($l =~ /error\:\s+https:\/\/www.github.com\/\/(.*?)[\s\n]/) {
  29
+	print "Checking $1\n";
  30
+	my $url = "https://www.github.com/".$1;
  31
+	$url =~ s/raw\/.*?\//raw\/master\//;
  32
+	my $res = $ua->get($url);
  33
+	my $rt = $res->as_string();
  34
+	$rt;
  35
+	if (check_common($rt)) {
  36
+	    print "Error found: $url\n";
  37
+	    handle_url($url);
  38
+	} else {
  39
+	    print "Error not found anymore :( in $rt\n";
  40
+	}
  41
+    }
  42
+}
  43
+
  44
+sub handle_url {
  45
+    my $url = shift @_;
  46
+    if ($url =~ /http.*\/(.*?)\/(.*?)\/raw\/master\/(README.*)/) {
  47
+	my $ruser = $1;
  48
+	my $repo = $2;
  49
+	my $file = $3;
  50
+	print "u:".$ruser."\n";
  51
+	print "r:".$repo."\n";
  52
+	print "f:".$file."\n";
  53
+	#print "error: ".$url;
  54
+	my $url2 = "https://github.com/".$user."/$repo";
  55
+	my $res2 = $ua->get($url2);
  56
+	$c = $c+1;
  57
+	if (!$res2->is_success || ($c < 16 && $repo !~ //)) {
  58
+	    print "Missing repo $repo.\n";
  59
+	    my $fork_res = $ua->post("https://github.com/api/v2/json/repos/fork/$ruser/$repo",{login => $user,
  60
+	       token => $token});
  61
+	    print "attempting to fork resulted in ".$fork_res->as_string();
  62
+	    print "Sleeping 30 seconds for fork\n";
  63
+	    sleep (30);
  64
+	    print "Sleeping rand()*10\n";
  65
+	    sleep (10*rand());
  66
+	    #clone
  67
+	    print "runing: cd foo; git clone git\@github.com:$user/$repo.git || git clone git\@github.com:$user/$repo.git;cd ..";
  68
+	    `cd foo; git clone git\@github.com:$user/$repo.git || git clone git\@github.com:$user/$repo.git;cd ..`;
  69
+	    #Fix
  70
+	    my $t = "";
  71
+	    open (IN, "foo/$repo/$file") or die "Unable to open $file in $repo";
  72
+	    while ($j = <IN>) {
  73
+		$t = $t.$j;
  74
+	    }
  75
+	    close(IN);
  76
+	    open (OUT, ">foo/$repo/$file") or die "Unable to open $file in $repo";
  77
+	    my $t = fix_text($t);
  78
+	    print OUT $t;
  79
+	    close (OUT);
  80
+	    `cd foo;cd $repo; git commit -a -m \"Spelling correction in README\"; git push; sleep 1;git push; cd ..;cd ..`;
  81
+	    my $make_pull_req = 1;
  82
+	    if ($make_pull_req) {
  83
+		print "Sleeping 10 for github to catch up with teh push...\n";
  84
+		sleep(10);
  85
+		my $pull_request = $ua->post("https://github.com/api/v2/json/pulls/$ruser/$repo",{login => $user,
  86
+												  token => $token,
  87
+												  "pull[base]" => "master",
  88
+												  "pull[title]" => "Spelling fix",
  89
+												  "pull[body]" => "Fix a typo in README",
  90
+												  "pull[head]" => "$user:master"});
  91
+		my $pt = $pull_request->as_string();
  92
+		print "Pull request is ".$pt;
  93
+		if ($pt =~ /html_url":"(https:\/\/github.com.*?)\"/) {
  94
+		    my $url_to_link_to = $1;
  95
+		    my $twitter_text = "Fixing a spelling error in $ruser/$repo on github. (see $url_to_link_to for the pull req)";
  96
+		    print "twteeting :".$twitter_text;
  97
+		    $nt->update($twitter_text);
  98
+		}
  99
+	    }
  100
+	} else {
  101
+	    print "Allready have $repo from $url2 , skipping forward!\n";
  102
+	}
  103
+    }
  104
+    
  105
+}
38  targets.pl
... ...
@@ -0,0 +1,38 @@
  1
+#!/usr/bin/perl
  2
+
  3
+use Bing::Search;
  4
+use Bing::Search::Source::Web;
  5
+use Net::GitHub;
  6
+use Text::SpellChecker;
  7
+
  8
+my $token = "";
  9
+my $user = "";
  10
+
  11
+my $a = "";
  12
+my $inc = 10;
  13
+my $offset = 0;
  14
+my $max = 100;
  15
+while ($offset < $max) {
  16
+    my $search = Bing::Search->new();
  17
+    $search->AppId($a);
  18
+    $search->Query("blob/master/readme.md site:github.com");
  19
+    my $source = Bing::Search::Source::Web->new();
  20
+    $source->Web_Count(10);
  21
+    $source->Web_Offset($offset);
  22
+    my $hash = $source->params;
  23
+    $hash->{'web.offset'} = $offset;
  24
+    $source->params($hash);
  25
+    
  26
+    $search->add_source($source);
  27
+    
  28
+    my $response = $search->search();
  29
+
  30
+    foreach my $result ( @{$response->results} ) {
  31
+	print $result->Title, " -> ", $result->Url, "\n";
  32
+    }
  33
+    $offset += $inc;
  34
+}
  35
+#
  36
+#print "Connecting to github!\n";
  37
+#my $github = Net::GitHub::V2->new(login => $user, token => $token);
  38
+
20  targets2.pl
... ...
@@ -0,0 +1,20 @@
  1
+#!/usr/bin/perl
  2
+use LWP::UserAgent;
  3
+
  4
+my $ua = new LWP::UserAgent;
  5
+my $max = 400;
  6
+my $v = 1;
  7
+my $inc = 1;
  8
+my @foo;
  9
+while ($v < $max) {
  10
+    my $url = "https://github.com/search?type=Code&language=Markdown&q=readme&repo=&langOverride=&x=27&y=30&start_value=$v";
  11
+    my $res = $ua->get("$url");
  12
+    my $rt = $res->as_string();
  13
+    while ($rt =~ s/\"(.*?README\.m.*?)\"//) {
  14
+	print "$1\n";
  15
+    }
  16
+    $v += $inc;
  17
+    sleep 10;
  18
+#    print "done, left with $rt\n";
  19
+#    exit;
  20
+}
577  wordlist.pl
... ...
@@ -0,0 +1,577 @@
  1
+#stolen from http://cpansearch.perl.org/src/APOCAL/Pod-Spell-CommonMistakes-1.000/lib/Pod/Spell/CommonMistakes/WordList.pm
  2
+my %common = (
  3
+ "abandonning" => "abandoning",
  4
+ "abigious" => "ambiguous",
  5
+ "abitrate" => "arbitrate",
  6
+ "abov" => "above",
  7
+ "absense" => "absence",
  8
+ "absolut" => "absolute",
  9
+ "absoulte" => "absolute",
  10
+ "acceleratoin" => "acceleration",
  11
+ "accelleration" => "acceleration",
  12
+ "accesing" => "accessing",
  13
+ "accesnt" => "accent",
  14
+ "accessable" => "accessible",
  15
+ "accidentaly" => "accidentally",
  16
+ "accidentually" => "accidentally",
  17
+ "accomodate" => "accommodate",
  18
+ "accomodates" => "accommodates",
  19
+ "accout" => "account",
  20
+ "acess" => "access",
  21
+ "acessable" => "accessible",
  22
+ "ackowledge" => "acknowledge",
  23
+ "ackowledged" => "acknowledged",
  24
+ "acknowldegement" => "acknowldegement",
  25
+ "acording" => "according",
  26
+ "activete" => "activate",
  27
+ "acumulating" => "accumulating",
  28
+ "addional" => "additional",
  29
+ "additionaly" => "additionally",
  30
+ "addreses" => "addresses",
  31
+ "aditional" => "additional",
  32
+ "aditionally" => "additionally",
  33
+ "aditionaly" => "additionally",
  34
+ "adress" => "address",
  35
+ "adresses" => "addresses",
  36
+ "adviced" => "advised",
  37
+ "afecting" => "affecting",
  38
+ "albumns" => "albums",
  39
+ "alegorical" => "allegorical",
  40
+ "algorith" => "algorithm",
  41
+ "algorithmical" => "algorithmically",
  42
+ "algoritm" => "algorithm",
  43
+ "algoritms" => "algorithms",
  44
+ "algorrithm" => "algorithm",
  45
+ "algorritm" => "algorithm",
  46
+ "allpication" => "application",
  47
+ "alot" => "a lot",
  48
+ "alow" => "allow",
  49
+ "alows" => "allows",
  50
+ "altough" => "although",
  51
+ "ambigious" => "ambiguous",
  52
+ "amoung" => "among",
  53
+ "amout" => "amount",
  54
+ "analysator" => "analyzer",
  55
+ "ang" => "and",
  56
+ "anniversery" => "anniversary",
  57
+ "annoucement" => "announcement",
  58
+ "anomolies" => "anomalies",
  59
+ "anomoly" => "anomaly",
  60
+ "aplication" => "application",
  61
+ "appearence" => "appearance",
  62
+ "appropiate" => "appropriate",
  63
+ "appropriatly" => "appropriately",
  64
+ "aquired" => "acquired",
  65
+ "arbitary" => "arbitrary",
  66
+ "architechture" => "architecture",
  67
+ "arguement" => "argument",
  68
+ "arguements" => "arguments",
  69
+ "aritmetic" => "arithmetic",
  70
+ "arne't" => "aren't",
  71
+ "arraival" => "arrival",
  72
+ "artifical" => "artificial",
  73
+ "artillary" => "artillery",
  74
+ "assigment" => "assignment",
  75
+ "assigments" => "assignments",
  76
+ "assistent" => "assistant",
  77
+ "asuming" => "assuming",
  78
+ "atomatically" => "automatically",
  79
+ "attemps" => "attempts",
  80
+ "attruibutes" => "attributes",
  81
+ "authentification" => "authentication",
  82
+ "automaticaly" => "automatically",
  83
+ "automaticly" => "automatically",
  84
+ "automatize" => "automate",
  85
+ "automatized" => "automated",
  86
+ "automatizes" => "automates",
  87
+ "autonymous" => "autonomous",
  88
+ "auxilliary" => "auxiliary",
  89
+ "avaiable" => "available",
  90
+ "availabled" => "available",
  91
+ "availablity" => "availability",
  92
+ "availale" => "available",
  93
+ "availavility" => "availability",
  94
+ "availble" => "available",
  95
+ "availble" => "available",
  96
+ "availiable" => "available",
  97
+ "avaliable" => "available",
  98
+ "avaliable" => "available",
  99
+ "backgroud" => "background",
  100
+ "bahavior" => "behavior",
  101
+ "baloon" => "balloon",
  102
+ "baloons" => "balloons",
  103
+ "batery" => "battery",
  104
+ "becomming" => "becoming",
  105
+ "becuase" => "because",
  106
+ "begining" => "beginning",
  107
+ "calender" => "calendar",
  108
+ "cancelation" => "cancellation",
  109
+ "capabilites" => "capabilities",
  110
+ "capatibilities" => "capabilities",
  111
+ "cariage" => "carriage",
  112
+ "challange" => "challenge",
  113
+ "challanges" => "challenges",
  114
+ "changable" => "changeable",
  115
+ "charachter" => "character",
  116
+ "charachters" => "characters",
  117
+ "charcter" => "character",
  118
+ "childs" => "children",
  119
+ "chnage" => "change",
  120
+ "chnages" => "changes",
  121
+ "choosen" => "chosen",
  122
+ "collapsable" => "collapsible",
  123
+ "colorfull" => "colorful",
  124
+ "comand" => "command",
  125
+ "comit" => "commit",
  126
+ "commerical" => "commercial",
  127
+ "comminucation" => "communication",
  128
+ "commited" => "committed",
  129
+ "commiting" => "committing",
  130
+ "committ" => "commit",
  131
+ "commoditiy" => "commodity",
  132
+ "compability" => "compatibility",
  133
+ "compatability" => "compatibility",
  134
+ "compatable" => "compatible",
  135
+ "compatibiliy" => "compatibility",
  136
+ "compatibilty" => "compatibility",
  137
+ "compleatly" => "completely",
  138
+ "completly" => "completely",
  139
+ "complient" => "compliant",
  140
+ "compres" => "compress",
  141
+ "compresion" => "compression",
  142
+ "configuratoin" => "configuration",
  143
+ "conjuction" => "conjunction",
  144
+ "connectinos" => "connections",
  145
+ "connnection" => "connection",
  146
+ "connnections" => "connections",
  147
+ "consistancy" => "consistency",
  148
+ "containes" => "contains",
  149
+ "containts" => "contains",
  150
+ "contence" => "contents",
  151
+ "continous" => "continuous",
  152
+ "continueing" => "continuing",
  153
+ "contraints" => "constraints",
  154
+ "convertor" => "converter",
  155
+ "convinient" => "convenient",
  156
+ "corected" => "corrected",
  157
+ "correponding" => "corresponding",
  158
+ "correponds" => "corresponds",
  159
+ "correspoding" => "corresponding",
  160
+ "cryptocraphic" => "cryptographic",
  161
+ "curently" => "currently",
  162
+ "dafault" => "default",
  163
+ "deafult" => "default",
  164
+ "deamon" => "daemon",
  165
+ "debain" => "Debian",
  166
+ "debians" => "Debian's",
  167
+ "decompres" => "decompress",
  168
+ "definate" => "definite",
  169
+ "definately" => "definitely",
  170
+ "delemiter" => "delimiter",
  171
+ "dependancies" => "dependencies",
  172
+ "dependancy" => "dependency",
  173
+ "dependant" => "dependent",
  174
+ "desactivate" => "deactivate",
  175
+ "detabase" => "database",
  176
+ "developement" => "development",
  177
+ "developped" => "developed",
  178
+ "developpement" => "development",
  179
+ "developper" => "developer",
  180
+ "deveolpment" => "development",
  181
+ "devided" => "divided",
  182
+ "dictionnary" => "dictionary",
  183
+ "diplay" => "display",
  184
+ "disapeared" => "disappeared",
  185
+ "discontiguous" => "noncontiguous",
  186
+ "dispertion" => "dispersion",
  187
+ "dissapears" => "disappears",
  188
+ "docuentation" => "documentation",
  189
+ "documantation" => "documentation",
  190
+ "documentaion" => "documentation",
  191
+ "dont" => "don't",
  192
+ "downlad" => "download",
  193
+ "downlads" => "downloads",
  194
+ "easilly" => "easily",
  195
+ "ecspecially" => "especially",
  196
+ "edditable" => "editable",
  197
+ "editting" => "editing",
  198
+ "eletronic" => "electronic",
  199
+ "enchanced" => "enhanced",
  200
+ "encorporating" => "incorporating",
  201
+ "endianess" => "endianness",
  202
+ "enhaced" => "enhanced",
  203
+ "enlightnment" => "enlightenment",
  204
+ "enocded" => "encoded",
  205
+ "enterily" => "entirely",
  206
+ "enviroiment" => "environment",
  207
+ "enviroment" => "environment",
  208
+ "environement" => "environment",
  209
+ "environent" => "environment",
  210
+ "equivelant" => "equivalent",
  211
+ "equivilant" => "equivalent",
  212
+ "excecutable" => "executable",
  213
+ "exceded" => "exceeded",
  214
+ "excellant" => "excellent",
  215
+ "exlcude" => "exclude",
  216
+ "exlcusive" => "exclusive",
  217
+ "expecially" => "especially",
  218
+ "explicitely" => "explicitly",
  219
+ "expresion" => "expression",
  220
+ "exprimental" => "experimental",
  221
+ "extention" => "extension",
  222
+ "failuer" => "failure",
  223
+ "familar" => "familiar",
  224
+ "fatser" => "faster",
  225
+ "feauture" => "feature",
  226
+ "feautures" => "features",
  227
+ "fetaure" => "feature",
  228
+ "fetaures" => "features",
  229
+ "forse" => "force",
  230
+ "fortan" => "fortran",
  231
+ "forwardig" => "forwarding",
  232
+ "framwork" => "framework",
  233
+ "fuction" => "function",
  234
+ "fuctions" => "functions",
  235
+ "functionaly" => "functionally",
  236
+ "functionnality" => "functionality",
  237
+ "functonality" => "functionality",
  238
+ "futhermore" => "furthermore",
  239
+ "generiously" => "generously",
  240
+ "grahical" => "graphical",
  241
+ "grahpical" => "graphical",
  242
+ "grapic" => "graphic",
  243
+ "guage" => "gauge",
  244
+ "halfs" => "halves",
  245
+ "heirarchically" => "hierarchically",
  246
+ "helpfull" => "helpful",
  247
+ "hierachy" => "hierarchy",
  248
+ "hierarchie" => "hierarchy",
  249
+ "howver" => "however",
  250
+ "immeadiately" => "immediately",
  251
+ "implemantation" => "implementation",
  252
+ "implemention" => "implementation",
  253
+ "incomming" => "incoming",
  254
+ "incompatabilities" => "incompatibilities",
  255
+ "incompatable" => "incompatible",
  256
+ "inconsistant" => "inconsistent",
  257
+ "indendation" => "indentation",
  258
+ "indended" => "intended",
  259
+ "independant" => "independent",
  260
+ "informatiom" => "information",
  261
+ "informations" => "information",
  262
+ "infromation" => "information",
  263
+ "initalize" => "initialize",
  264
+ "initators" => "initiators",
  265
+ "initializiation" => "initialization",
  266
+ "inofficial" => "unofficial",
  267
+ "integreated" => "integrated",
  268
+ "integrety" => "integrity",
  269
+ "integrey" => "integrity",
  270
+ "intendet" => "intended",
  271
+ "interchangable" => "interchangeable",
  272
+ "intermittant" => "intermittent",
  273
+ "interupted" => "interrupted",
  274
+ "jave" => "java",
  275
+ "langage" => "language",
  276
+ "langauage" => "language",
  277
+ "langugage" => "language",
  278
+ "lauch" => "launch",
  279
+ "lenght" => "length",
  280
+ "lesstiff" => "lesstif",
  281
+ "libaries" => "libraries",
  282
+ "libary" => "library",
  283
+ "libraris" => "libraries",
  284
+ "licenceing" => "licencing",
  285
+ "loggging" => "logging",
  286
+ "loggin" => "login",
  287
+ "logile" => "logfile",
  288
+ "machinary" => "machinery",
  289
+ "maintainance" => "maintenance",
  290
+ "maintainence" => "maintenance",
  291
+ "makeing" => "making",
  292
+ "malplace" => "misplace",
  293
+ "malplaced" => "misplaced",
  294
+ "managable" => "manageable",
  295
+ "manoeuvering" => "maneuvering",
  296
+ "mathimatical" => "mathematical",
  297
+ "mathimatic" => "mathematic",
  298
+ "mathimatics" => "mathematics",
  299
+ "ment" => "meant",
  300
+ "messsage" => "message",
  301
+ "messsages" => "messages",
  302
+ "microprocesspr" => "microprocessor",
  303
+ "milliseonds" => "milliseconds",
  304
+ "miscelleneous" => "miscellaneous",
  305
+ "misformed" => "malformed",
  306
+ "mispelled" => "misspelled",
  307
+ "mmnemonic" => "mnemonic",
  308
+ "modulues" => "modules",
  309
+ "monochorome" => "monochrome",
  310
+ "monochromo" => "monochrome",
  311
+ "monocrome" => "monochrome",
  312
+ "mroe" => "more",
  313
+ "multidimensionnal" => "multidimensional",
  314
+ "mulitplied" => "multiplied",
  315
+ "mutiple" => "multiple",
  316
+ "nam" => "name",
  317
+ "nams" => "names",
  318
+ "navagating" => "navigating",
  319
+ "nead" => "need",
  320
+ "neccesary" => "necessary",
  321
+ "neccessary" => "necessary",
  322
+ "necesary" => "necessary",
  323
+ "negotation" => "negotiation",
  324
+ "nescessary" => "necessary",
  325
+ "nessessary" => "necessary",
  326
+ "noticable" => "noticeable",
  327
+ "notications" => "notifications",
  328
+ "o'caml" => "OCaml",
  329
+ "omitt" => "omit",
  330
+ "ommitted" => "omitted",
  331
+ "onself" => "oneself",
  332
+ "optionnal" => "optional",
  333
+ "optmizations" => "optimizations",
  334
+ "orientatied" => "orientated",
  335
+ "orientied" => "oriented",
  336
+ "ouput" => "output",
  337
+ "overaall" => "overall",
  338
+ "overriden" => "overridden",
  339
+ "pacakge" => "package",
  340
+ "pachage" => "package",
  341
+ "packacge" => "package",
  342
+ "packege" => "package",
  343
+ "packge" => "package",
  344
+ "pakage" => "package",
  345
+ "pallette" => "palette",
  346
+ "paramameters" => "parameters",
  347
+ "paramater" => "parameter",
  348
+ "parametes" => "parameters",
  349
+ "paramter" => "parameter",
  350
+ "paramters" => "parameters",
  351
+ "particularily" => "particularly",
  352
+ "pased" => "passed",
  353
+ "peprocessor" => "preprocessor",
  354
+ "perfoming" => "performing",
  355
+ "permissons" => "permissions",
  356
+ "persistant" => "persistent",
  357
+ "plattform" => "platform",
  358
+ "pleaes" => "please",
  359
+ "ploting" => "plotting",
  360
+ "posible" => "possible",
  361
+ "postgressql" => "PostgreSQL",
  362
+ "powerfull" => "powerful",
  363
+ "preceeded" => "preceded",
  364
+ "preceeding" => "preceding",
  365
+ "precendence" => "precedence",
  366
+ "precission" => "precision",
  367
+ "prefered" => "preferred",
  368
+ "prefferably" => "preferably",
  369
+ "prepaired" => "prepared",
  370
+ "primative" => "primitive",
  371
+ "princliple" => "principle",
  372
+ "priorty" => "priority",
  373
+ "procceed" => "proceed",
  374
+ "proccesors" => "processors",
  375
+ "proces" => "process",
  376
+ "processessing" => "processing",
  377
+ "processpr" => "processor",
  378
+ "processsing" => "processing",
  379
+ "progams" => "programs",
  380
+ "programers" => "programmers",
  381
+ "programm" => "program",
  382
+ "programms" => "programs",
  383
+ "promps" => "prompts",
  384
+ "pronnounced" => "pronounced",
  385
+ "prononciation" => "pronunciation",
  386
+ "pronouce" => "pronounce",
  387
+ "pronunce" => "pronounce",
  388
+ "propery" => "property",
  389
+ "prosess" => "process",
  390
+ "protable" => "portable",
  391
+ "protcol" => "protocol",
  392
+ "protecion" => "protection",
  393
+ "protocoll" => "protocol",
  394
+ "psychadelic" => "psychedelic",
  395
+ "quering" => "querying",
  396
+ "reasearch" => "research",
  397
+ "reasearcher" => "researcher",
  398
+ "reasearchers" => "researchers",
  399
+ "recieved" => "received",
  400
+ "recieve" => "receive",
  401
+ "reciever" => "receiver",
  402
+ "recogniced" => "recognised",
  403
+ "recognizeable" => "recognizable",
  404
+ "recommanded" => "recommended",
  405
+ "redircet" => "redirect",
  406
+ "redirectrion" => "redirection",
  407
+ "reenabled" => "re-enabled",
  408
+ "reenable" => "re-enable",
  409
+ "reencode" => "re-encode",
  410
+ "refence" => "reference",
  411
+ "registerd" => "registered",
  412
+ "registraration" => "registration",
  413
+ "regulamentations" => "regulations",
  414
+ "remoote" => "remote",
  415
+ "removeable" => "removable",
  416
+ "repectively" => "respectively",
  417
+ "replacments" => "replacements",
  418
+ "replys" => "replies",
  419
+ "requiere" => "require",
  420
+ "requred" => "required",
  421
+ "resizeable" => "resizable",
  422
+ "ressize" => "resize",
  423
+ "ressource" => "resource",
  424
+ "retransmited" => "retransmitted",
  425
+ "runned" => "ran",
  426
+ "runnning" => "running",
  427
+ "safly" => "safely",
  428
+ "savable" => "saveable",
  429
+ "searchs" => "searches",
  430
+ "secund" => "second",
  431
+ "separatly" => "separately",
  432
+ "sepcify" => "specify",
  433
+ "seperated" => "separated",
  434
+ "seperately" => "separately",
  435
+ "seperate" => "separate",
  436
+ "seperatly" => "separately",
  437
+ "seperator" => "separator",
  438
+ "sequencial" => "sequential",
  439
+ "serveral" => "several",
  440
+ "setts" => "sets",
  441
+ "similiar" => "similar",
  442
+ "simliar" => "similar",
  443
+ "speach" => "speech",
  444
+ "speciefied" => "specified",
  445
+ "specifed" => "specified",
  446
+ "specificaton" => "specification",
  447
+ "specifing" => "specifying",
  448
+ "speficied" => "specified",
  449
+ "speling" => "spelling",
  450
+ "splitted" => "split",
  451
+ "staically" => "statically",
  452
+ "standardss" => "standards",
  453
+ "standart" => "standard",
  454
+ "staticly" => "statically",
  455
+ "subdirectoires" => "subdirectories",
  456
+ "suble" => "subtle",
  457
+ "succesfully" => "successfully",
  458
+ "succesful" => "successful",
  459
+ "sucessfully" => "successfully",
  460
+ "superflous" => "superfluous",
  461
+ "superseeded" => "superseded",
  462
+ "suplied" => "supplied",
  463
+ "suport" => "support",
  464
+ "suppored" => "supported",
  465
+ "supportin" => "supporting",
  466
+ "suppoted" => "supported",
  467
+ "suppported" => "supported",
  468
+ "suppport" => "support",
  469
+ "surpresses" => "suppresses",
  470
+ "suspicously" => "suspiciously",
  471
+ "synax" => "syntax",
  472
+ "synchonized" => "synchronized",
  473
+ "syncronize" => "synchronize",
  474
+ "syncronizing" => "synchronizing",
  475
+ "syncronus" => "synchronous",
  476
+ "syste" => "system",
  477
+ "sythesis" => "synthesis",
  478
+ "taht" => "that",
  479
+ "throught" => "through",
  480
+ "transfering" => "transferring",
  481
+ "trasmission" => "transmission",
  482
+ "treshold" => "threshold",
  483
+ "trigerring" => "triggering",
  484
+ "unecessary" => "unnecessary",
  485
+ "unexecpted" => "unexpected",
  486
+ "unfortunatelly" => "unfortunately",
  487
+ "unknonw" => "unknown",
  488
+ "unkown" => "unknown",
  489
+ "unuseful" => "useless",
  490
+ "usefull" => "useful",
  491
+ "usera" => "users",
  492
+ "usetnet" => "Usenet",
  493
+ "usualy" => "usually",
  494
+ "utilites" => "utilities",
  495
+ "utillities" => "utilities",
  496
+ "utilties" => "utilities",
  497
+ "utiltity" => "utility",
  498
+ "utitlty" => "utility",
  499
+ "variantions" => "variations",
  500
+ "varient" => "variant",
  501
+ "verbse" => "verbose",
  502
+ "verisons" => "versions",
  503
+ "verison" => "version",
  504
+ "verson" => "version",
  505
+ "vicefersa" => "vice-versa",
  506
+ "visiters" => "visitors",
  507
+ "vitual" => "virtual",
  508
+ "whataver" => "whatever",
  509
+ "wheter" => "whether",
  510
+ "wierd" => "weird",
  511
+ "yur" => "your",
  512
+);
  513
+
  514
+# extra words contributed by CPAN users, thanks!
  515
+# split it up for easier maintenance of Lintian data
  516
+my %common_cpan = (
  517
+ "refering" => "referring",
  518
+ "writeable" => "writable",
  519
+ "nineth" => "ninth",
  520
+ "ommited" => "omitted",
  521
+ "omited" => "omitted",
  522
+ "requrie" => "require",
  523
+ "existant" => "existent",
  524
+ "explict" => "explicit",
  525
+ "agument" => "augument",
  526
+ "destionation" => "destination",
  527
+);
  528
+
  529
+#Seen in github commits for spelling correction
  530
+my %bypablo = ( "syncronous" => "synchronous",
  531
+		"arn't" => "aren't",
  532
+		"excption" => "exception",
  533
+		"remotly" => "remotely",
  534
+                "occured" => "occurred",
  535
+                "githuub" => "github",
  536
+                "majourity" => "majority",
  537
+                "systemm" => "system");
  538
+
  539
+%common = ( %common, %common_cpan , %bypablo);
  540
+
  541
+sub fix_text {
  542
+    my $text = shift @_;
  543
+    foreach my $k  (keys %common) {
  544
+	if ($text =~ s/\b($k)\b/preserve_case($1,$common{$k})/egi) {
  545
+	    print "Swapped on $k update to $common{$k}\n";
  546
+	}
  547
+    }
  548
+    return $text;
  549
+}
  550
+
  551
+sub preserve_case() {
  552
+    my ($old, $new) = @_;
  553
+    my $mask = uc $old ^ $old;
  554
+    uc $new | $mask .
  555
+	substr($mask, -1) x (length($new) - length($old))
  556
+}
  557
+
  558
+sub check_common {
  559
+    my $text = shift @_;
  560
+    # Holds the failures we saw
  561
+    my %err;
  562
+    
  563
+    my @words = split(/\s+/,$text);
  564
+    # Logic taken from Lintian::Check::check_spelling(), thanks!
  565
+    foreach my $w ( @words ) {
  566
+	my $lcw = lc( $w );
  567
+	if ( exists $common{ $lcw } ) {
  568
+	    print "Error was $lcw\n";
  569
+	    return 1;
  570
+	}
  571
+    }
  572
+    
  573
+    return 0;
  574
+}
  575
+
  576
+
  577
+1;

0 notes on commit c5f98b5

Please sign in to comment.
Something went wrong with that request. Please try again.