-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit d2d0ee0
Showing
11 changed files
with
633 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
name: Spell checking | ||
on: [pull_request, push] | ||
|
||
jobs: | ||
build: | ||
name: Spell checker | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2.0.0 | ||
with: | ||
fetch-depth: 2 | ||
- uses: ./ | ||
env: | ||
bucket: ssh://git@github.com/check-spelling/check-spelling.git | ||
project: spelling-data | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
DEBUG: 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
FROM debian:9.5-slim | ||
|
||
RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update < /dev/null > /dev/null | ||
RUN DEBIAN_FRONTEND=noninteractive apt-get install -qq curl git jq < /dev/null > /dev/null | ||
WORKDIR /app | ||
COPY docker-setup setup | ||
COPY reporter.json reporter.json | ||
COPY reporter.pl reporter.pl | ||
COPY w spelling-unknown-word-splitter.pl | ||
RUN ./setup | ||
RUN rm setup | ||
|
||
LABEL "com.github.actions.name"="Spell Checker" | ||
LABEL "com.github.actions.description"="Check repository for spelling errors" | ||
LABEL "com.github.actions.icon"="edit-3" | ||
LABEL "com.github.actions.color"="red" | ||
|
||
LABEL "repository"="http://github.com/jsoref/spelling-action" | ||
LABEL "homepage"="http://github.com/jsoref/spelling-action/tree/master/README.md" | ||
LABEL "maintainer"="Josh Soref <jsoref@noreply.users.github.com>" | ||
|
||
COPY test-spelling-unknown-words test-spelling-unknown-words.sh | ||
COPY exclude exclude.pl | ||
|
||
RUN chmod +x test-spelling-unknown-words.sh exclude.pl spelling-unknown-word-splitter.pl | ||
ENTRYPOINT ["/app/test-spelling-unknown-words.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Spelling tools | ||
|
||
## Overview | ||
|
||
Everyone makes typos. This includes people writing documentation and comments, | ||
but it also includes programmers naming variables, functions, apis, classes, | ||
and filenames. | ||
|
||
Often, programmers will use `InitialCapitalization`, `camelCase`, | ||
`ALL_CAPS`, or `IDLCase` when naming their things. When they do this, it makes | ||
it much harder for naive spelling tools to recognize misspellings, and as such, | ||
with a really high false-positive rate, people don't tend to enable spellchecking | ||
at all. | ||
|
||
This repository's tools are capable of tolerating all of those variations. | ||
Specifically, [w](https://github.com/jsoref/spelling/blob/master/w) understands | ||
enough about how programmers name things that it can split the above conventions | ||
into word-like things for checking against a dictionary. | ||
|
||
## Spell Checker GitHub Actions | ||
|
||
[![Spell checking](https://github.com/check-spelling/check-spelling/workflows/Spell%20checking/badge.svg?branch=master)](https://github.com/check-spelling/check-spelling/actions?query=workflow:"Spell+checking"+branch:master) | ||
|
||
[More information](https://github.com/jsoref/spelling#overview) | ||
|
||
### Required Configuration Variables | ||
|
||
|
||
| Variable | Description | | ||
| ------------- | ------------- | | ||
| bucket | a `gsutil` or `curl` compatible url for which the tool has read access to a couple of files. | | ||
| project | a folder within `bucket`. This allows you to share common items across projects. | | ||
| GITHUB_TOKEN | Secret used to retrieve your code. | | ||
|
||
### Optional Configuration Variables | ||
|
||
| Variable | Description | | ||
| ------------- | ------------- | | ||
| VERBOSE | `1` if you want to be reminded of how many words are in your whitelist for each run. | | ||
|
||
## Behavior | ||
|
||
* This action will automatically comment on PRs / commits with its opinion. | ||
* It will try to identify a limited number of lines containing the words it | ||
doesn't recognize. | ||
|
||
## Limitations | ||
|
||
* It will not add comments to the first version of a file - if you want | ||
to ensure it assigns blame, insert a commit with an empty file before you | ||
fill the file. | ||
|
||
# License | ||
|
||
MIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
name: 'Spell checking' | ||
description: 'Spell check commits' | ||
author: 'jsoref' | ||
branding: | ||
icon: 'edit-3' | ||
color: 'red' | ||
inputs: | ||
repo-token: | ||
description: 'The GITHUB_TOKEN secret' | ||
bucket: | ||
description: 'Container for spelling exclusions and whitelist' | ||
required: true | ||
project: | ||
description: 'Folder/Branch containing exclusions/whitelist' | ||
required: true | ||
|
||
debug: | ||
description: 'Debug' | ||
runs: | ||
using: 'docker' | ||
image: 'Dockerfile' | ||
env: | ||
bucket: ${{ inputs.bucket }} | ||
project: ${{ inputs.project }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/bin/bash | ||
# This CI acceptance test is based on: | ||
# https://github.com/jsoref/spelling/tree/04648bdc63723e5cdf5cbeaff2225a462807abc8 | ||
# It is conceptually `f` which runs `w` (spelling-unknown-word-splitter) | ||
# plus `fchurn` which uses `dn` mostly rolled together. | ||
set -e | ||
|
||
spellchecker='/app' | ||
w_location="$spellchecker/w" | ||
temp='/tmp/spelling' | ||
dict="$spellchecker/words" | ||
word_splitter="$spellchecker/spelling-unknown-word-splitter.pl" | ||
run_output="$spellchecker/unknown.words.txt" | ||
|
||
wordlist=https://github.com/check-spelling/check-spelling/raw/dictionary/dict.txt | ||
|
||
mkdir -p "$temp" | ||
if [ ! -e "$dict" ]; then | ||
echo "Retrieving cached $(basename "$wordlist")" | ||
# english.words is taken from rpm: | ||
# https://rpmfind.net/linux/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/w/" | ||
# "words-.*.noarch.rpm" | ||
( | ||
curl -L -s "$wordlist" -o "$dict" | ||
) >/dev/null 2>/dev/null | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/usr/bin/perl | ||
# This script takes null delimited files as input | ||
# it drops paths that match the listed exclusions | ||
# output is null delimited to match input | ||
use File::Basename; | ||
my $dirname = dirname(__FILE__); | ||
|
||
my @excludes; | ||
open EXCLUDES, '<', $dirname.'/excludes.txt'; | ||
while (<EXCLUDES>) { | ||
s/^\s*(.*)\s*$/$1/; | ||
push @excludes, $_; | ||
} | ||
$/="\0"; | ||
my $exclude = scalar @excludes ? join "|", @excludes : '^$'; | ||
while (<>) { | ||
chomp; | ||
next if m{$exclude}; | ||
print "$_$/"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"problemMatcher": [ | ||
{ | ||
"owner": "jsoref-spelling", | ||
"pattern": [ | ||
{ | ||
"regexp": "^(.+):[\\s]line\\s(\\d+),[\\s]columns\\s(\\d+)-(\\d+),\\s(Error|Warning|Info)\\s-\\s(.+)\\s\\((.+)\\)$", | ||
"file": 1, | ||
"line": 2, | ||
"column": 3, | ||
"endColumn": 4, | ||
"severity": 5, | ||
"message": 6, | ||
"code": 7 | ||
} | ||
] | ||
} | ||
] | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/usr/bin/env perl | ||
|
||
die 'Please set $tokens' unless defined $ENV{tokens}; | ||
my $tokens=$ENV{tokens}; | ||
exit 0 unless $tokens =~ /\w/; | ||
$tokens=~ s/\s+/|/g; | ||
my $re = "\\b($tokens)\\b"; | ||
my $blame=defined $ENV{with_blame}; | ||
|
||
my $previous=''; | ||
my $first_line=0; | ||
while (<>) { | ||
my $line; | ||
if ($blame) { | ||
next if /^ /; | ||
s/^[0-9a-f^]+\s+(.*?)\s(\d+)\) //; | ||
($ARGV, $line) = ($1, $2); | ||
} else { | ||
if ($previous ne $ARGV) { | ||
$previous=$ARGV; | ||
$first_line = $. - 1; | ||
} | ||
$line = $. - $first_line; | ||
} | ||
if ($blame) { | ||
next if /^ /; | ||
s/^[0-9a-f^]+\s+\d+\) //; | ||
} | ||
next unless $_ =~ /$re/; | ||
while (/$re/g) { | ||
my ($start, $token) = (1 + length $`, $1); | ||
my $stop = $start + (length $token) - 1; | ||
print "$ARGV: line $line, columns $start-$stop, Warning - '$token' is not a recognized word. (unrecognized-spelling)\n"; | ||
} | ||
} |
Oops, something went wrong.