Skip to content

Commit

Permalink
draft initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jsoref committed Feb 14, 2020
0 parents commit d2d0ee0
Show file tree
Hide file tree
Showing 11 changed files with 633 additions and 0 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/spelling.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Spell checking
on: [pull_request, push]

jobs:
build:
name: Spell checker
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2.0.0
with:
fetch-depth: 2
- uses: ./
env:
bucket: ssh://git@github.com/check-spelling/check-spelling.git
project: spelling-data
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DEBUG: 1
26 changes: 26 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM debian:9.5-slim

RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update < /dev/null > /dev/null
RUN DEBIAN_FRONTEND=noninteractive apt-get install -qq curl git jq < /dev/null > /dev/null
WORKDIR /app
COPY docker-setup setup
COPY reporter.json reporter.json
COPY reporter.pl reporter.pl
COPY w spelling-unknown-word-splitter.pl
RUN ./setup
RUN rm setup

LABEL "com.github.actions.name"="Spell Checker"
LABEL "com.github.actions.description"="Check repository for spelling errors"
LABEL "com.github.actions.icon"="edit-3"
LABEL "com.github.actions.color"="red"

LABEL "repository"="http://github.com/jsoref/spelling-action"
LABEL "homepage"="http://github.com/jsoref/spelling-action/tree/master/README.md"
LABEL "maintainer"="Josh Soref <jsoref@noreply.users.github.com>"

COPY test-spelling-unknown-words test-spelling-unknown-words.sh
COPY exclude exclude.pl

RUN chmod +x test-spelling-unknown-words.sh exclude.pl spelling-unknown-word-splitter.pl
ENTRYPOINT ["/app/test-spelling-unknown-words.sh"]
55 changes: 55 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Spelling tools

## Overview

Everyone makes typos. This includes people writing documentation and comments,
but it also includes programmers naming variables, functions, apis, classes,
and filenames.

Often, programmers will use `InitialCapitalization`, `camelCase`,
`ALL_CAPS`, or `IDLCase` when naming their things. When they do this, it makes
it much harder for naive spelling tools to recognize misspellings, and as such,
with a really high false-positive rate, people don't tend to enable spellchecking
at all.

This repository's tools are capable of tolerating all of those variations.
Specifically, [w](https://github.com/jsoref/spelling/blob/master/w) understands
enough about how programmers name things that it can split the above conventions
into word-like things for checking against a dictionary.

## Spell Checker GitHub Actions

[![Spell checking](https://github.com/check-spelling/check-spelling/workflows/Spell%20checking/badge.svg?branch=master)](https://github.com/check-spelling/check-spelling/actions?query=workflow:"Spell+checking"+branch:master)

[More information](https://github.com/jsoref/spelling#overview)

### Required Configuration Variables


| Variable | Description |
| ------------- | ------------- |
| bucket | a `gsutil` or `curl` compatible url for which the tool has read access to a couple of files. |
| project | a folder within `bucket`. This allows you to share common items across projects. |
| GITHUB_TOKEN | Secret used to retrieve your code. |

### Optional Configuration Variables

| Variable | Description |
| ------------- | ------------- |
| VERBOSE | `1` if you want to be reminded of how many words are in your whitelist for each run. |

## Behavior

* This action will automatically comment on PRs / commits with its opinion.
* It will try to identify a limited number of lines containing the words it
doesn't recognize.

## Limitations

* It will not add comments to the first version of a file - if you want
to ensure it assigns blame, insert a commit with an empty file before you
fill the file.

# License

MIT
24 changes: 24 additions & 0 deletions action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: 'Spell checking'
description: 'Spell check commits'
author: 'jsoref'
branding:
icon: 'edit-3'
color: 'red'
inputs:
repo-token:
description: 'The GITHUB_TOKEN secret'
bucket:
description: 'Container for spelling exclusions and whitelist'
required: true
project:
description: 'Folder/Branch containing exclusions/whitelist'
required: true

debug:
description: 'Debug'
runs:
using: 'docker'
image: 'Dockerfile'
env:
bucket: ${{ inputs.bucket }}
project: ${{ inputs.project }}
26 changes: 26 additions & 0 deletions docker-setup
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
# This CI acceptance test is based on:
# https://github.com/jsoref/spelling/tree/04648bdc63723e5cdf5cbeaff2225a462807abc8
# It is conceptually `f` which runs `w` (spelling-unknown-word-splitter)
# plus `fchurn` which uses `dn` mostly rolled together.
set -e

spellchecker='/app'
w_location="$spellchecker/w"
temp='/tmp/spelling'
dict="$spellchecker/words"
word_splitter="$spellchecker/spelling-unknown-word-splitter.pl"
run_output="$spellchecker/unknown.words.txt"

wordlist=https://github.com/check-spelling/check-spelling/raw/dictionary/dict.txt

mkdir -p "$temp"
if [ ! -e "$dict" ]; then
echo "Retrieving cached $(basename "$wordlist")"
# english.words is taken from rpm:
# https://rpmfind.net/linux/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/w/"
# "words-.*.noarch.rpm"
(
curl -L -s "$wordlist" -o "$dict"
) >/dev/null 2>/dev/null
fi
20 changes: 20 additions & 0 deletions exclude
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/perl
# This script takes null delimited files as input
# it drops paths that match the listed exclusions
# output is null delimited to match input
use File::Basename;
my $dirname = dirname(__FILE__);

my @excludes;
open EXCLUDES, '<', $dirname.'/excludes.txt';
while (<EXCLUDES>) {
s/^\s*(.*)\s*$/$1/;
push @excludes, $_;
}
$/="\0";
my $exclude = scalar @excludes ? join "|", @excludes : '^$';
while (<>) {
chomp;
next if m{$exclude};
print "$_$/";
}
20 changes: 20 additions & 0 deletions reporter.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"problemMatcher": [
{
"owner": "jsoref-spelling",
"pattern": [
{
"regexp": "^(.+):[\\s]line\\s(\\d+),[\\s]columns\\s(\\d+)-(\\d+),\\s(Error|Warning|Info)\\s-\\s(.+)\\s\\((.+)\\)$",
"file": 1,
"line": 2,
"column": 3,
"endColumn": 4,
"severity": 5,
"message": 6,
"code": 7
}
]
}
]
}

35 changes: 35 additions & 0 deletions reporter.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env perl

die 'Please set $tokens' unless defined $ENV{tokens};
my $tokens=$ENV{tokens};
exit 0 unless $tokens =~ /\w/;
$tokens=~ s/\s+/|/g;
my $re = "\\b($tokens)\\b";
my $blame=defined $ENV{with_blame};

my $previous='';
my $first_line=0;
while (<>) {
my $line;
if ($blame) {
next if /^ /;
s/^[0-9a-f^]+\s+(.*?)\s(\d+)\) //;
($ARGV, $line) = ($1, $2);
} else {
if ($previous ne $ARGV) {
$previous=$ARGV;
$first_line = $. - 1;
}
$line = $. - $first_line;
}
if ($blame) {
next if /^ /;
s/^[0-9a-f^]+\s+\d+\) //;
}
next unless $_ =~ /$re/;
while (/$re/g) {
my ($start, $token) = (1 + length $`, $1);
my $stop = $start + (length $token) - 1;
print "$ARGV: line $line, columns $start-$stop, Warning - '$token' is not a recognized word. (unrecognized-spelling)\n";
}
}

0 comments on commit d2d0ee0

Please sign in to comment.