Skip to content

Commit

Permalink
Support flexible years in licenses (Lucas-C#23)
Browse files Browse the repository at this point in the history
The new flag `--use-current-year` allows automatically updating the year
in copyright headers, using always the current year when inserting new
ones and removing headers regardless of the years they contain.
  • Loading branch information
aostrowski committed Jan 22, 2023
1 parent e079e43 commit 833fa5f
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 31 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ into separate repos:
- src/license_header.txt # defaults to: LICENSE.txt
- --comment-style
- // # defaults to: #
- --use-current-year
```

### insert-license
Expand Down Expand Up @@ -84,6 +85,20 @@ In case you want to remove the comment headers introduced by
3. Remove the `--remove-header` arg and update your `LICENSE.txt` ;
4. Re-run the hook on all your files.

#### Handling years flexibly

You can add `--use-current-year` to change how the hook treats years in the
headers:

- When inserting a header, the current year will always be inserted
regardless of the year listed in the license file.
- When modifying a file that already has a header, the hook will ensure the
current year is listed in the header by using a range. For instance,
`2015` or `2015-2018` would get updated to `2015-2023` in the year 2023.
- When removing headers, the licenses will be removed regardless of the
years they contain -- as if they used the year currently present in the
license file.

#### Fuzzy license matching

In some cases your license files can contain several slightly different
Expand Down
110 changes: 102 additions & 8 deletions pre_commit_hooks/insert_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@
import collections
import re
import sys
from datetime import datetime
from typing import Sequence

from fuzzywuzzy import fuzz

FUZZY_MATCH_TODO_COMMENT = (" TODO: This license is not consistent with"
" license used in the project.")
FUZZY_MATCH_TODO_INSTRUCTIONS = (
" Delete the inconsistent license and above line"
" and rerun pre-commit to insert a good license." )
" and rerun pre-commit to insert a good license.")
FUZZY_MATCH_EXTRA_LINES_TO_CHECK = 3

SKIP_LICENSE_INSERTION_COMMENT = "SKIP LICENSE INSERTION"
Expand Down Expand Up @@ -56,6 +58,11 @@ def main(argv=None):
parser.add_argument('--insert-license-after-regex', default="",
help="Insert license after line matching regex (ex: '^<\\?php$')")
parser.add_argument('--remove-header', action='store_true')
parser.add_argument(
"--use-current-year",
action="store_true",
help=("Allow past years and ranges of years in headers. Use the current year in inserted and updated licenses."),
)
args = parser.parse_args(argv)

license_info = get_license_info(args)
Expand All @@ -78,6 +85,13 @@ def main(argv=None):
return 0


def _replace_year_in_license_with_current(plain_license: list[str]):
current_year = datetime.now().year
for i, line in enumerate(plain_license):
plain_license[i] = re.sub(r"\b\d{4}\b", str(current_year), line)
return plain_license


def get_license_info(args) -> LicenseInfo:
comment_start, comment_end = None, None
comment_prefix = args.comment_style.replace('\\t', '\t')
Expand All @@ -86,6 +100,10 @@ def get_license_info(args) -> LicenseInfo:
comment_start, comment_prefix, comment_end = comment_prefix.split('|')
with open(args.license_filepath, encoding='utf8') as license_file:
plain_license = license_file.readlines()

if args.use_current_year:
plain_license = _replace_year_in_license_with_current(plain_license)

prefixed_license = [f'{comment_prefix}{extra_space if line.strip() else ""}{line}'
for line in plain_license]
eol = '\r\n' if prefixed_license[0][-2:] == '\r\n' else '\n'
Expand Down Expand Up @@ -138,7 +156,8 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo):
license_header_index = find_license_header_index(
src_file_content=src_file_content,
license_info=license_info,
top_lines_count=args.detect_license_in_X_top_lines)
top_lines_count=args.detect_license_in_X_top_lines,
match_years_strictly=not args.use_current_year)
fuzzy_match_header_index = None
if args.fuzzy_match_generates_todo and license_header_index is None:
fuzzy_match_header_index = fuzzy_find_license_header_index(
Expand All @@ -150,6 +169,7 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo):
)
if license_header_index is not None:
if license_found(remove_header=args.remove_header,
update_year_range=args.use_current_year,
license_header_index=license_header_index,
license_info=license_info,
src_file_content=src_file_content,
Expand Down Expand Up @@ -235,7 +255,53 @@ def license_not_found( # pylint: disable=too-many-arguments
return False


def license_found(remove_header, license_header_index, license_info, src_file_content, src_filepath, encoding): # pylint: disable=too-many-arguments
# a year, then optionally a dash (with optional spaces before and after), and another year, surrounded by word boundaries
_YEAR_RANGE_PATTERN = re.compile(r"\b\d{4}(?: *- *\d{2,4})?\b")


def try_update_year_range(
src_file_content: list[str],
license_header_index: int,
) -> tuple[Sequence[str], bool]:
"""
Updates the years in a copyright header in src_file_content by
ensuring it contains a range ending in the current year.
Does nothing if the current year is already present as the end of
the range.
The change will affect only the first line containing years.
:param src_file_content: the lines in the source file
:param license_header_index: line where the license starts
:return: source file contents and a flag indicating update
"""
current_year = datetime.now().year
for i in range(license_header_index, len(src_file_content)):
line = src_file_content[i]
matches = _YEAR_RANGE_PATTERN.findall(line)
if matches:
match = matches[-1]
start_year = int(match[:4])
end_year = match[5:]
if not end_year or int(end_year) < current_year:
updated = line.replace(match,
str(start_year) + '-' + str(current_year))
# verify the current list of years ends in the current one
if _YEARS_PATTERN.findall(updated)[-1][-4:] != str(current_year):
print(f"Unable to update year range in line: {line.rstrip()}. Got: {updated.rstrip()}")
break
src_file_content[i] = updated
return src_file_content, True
return src_file_content, False


def license_found(
remove_header,
update_year_range,
license_header_index,
license_info,
src_file_content,
src_filepath,
encoding,
): # pylint: disable=too-many-arguments
"""
Executed when license is found. It does nothing if remove_header is False,
removes the license if remove_header is True.
Expand All @@ -246,6 +312,7 @@ def license_found(remove_header, license_header_index, license_info, src_file_co
:param src_filepath: path of the src_file
:return: True if change was made, False otherwise
"""
updated = False
if remove_header:
last_license_line_index = license_header_index + len(license_info.prefixed_license)
if last_license_line_index < len(src_file_content) and src_file_content[last_license_line_index].strip():
Expand All @@ -255,10 +322,15 @@ def license_found(remove_header, license_header_index, license_info, src_file_co
src_file_content = src_file_content[:license_header_index] + \
src_file_content[license_header_index +
len(license_info.prefixed_license) + 1:]
updated = True
elif update_year_range:
src_file_content, updated = try_update_year_range(src_file_content, license_header_index)

if updated:
with open(src_filepath, 'w', encoding=encoding) as src_file:
src_file.write(''.join(src_file_content))
return True
return False

return updated


def fuzzy_license_found(license_info, # pylint: disable=too-many-arguments
Expand Down Expand Up @@ -289,17 +361,39 @@ def fuzzy_license_found(license_info, # pylint: disable=too-many-arguments
return True


# More flexible than _YEAR_RANGE_PATTERN. For detecting all years in a line, not just a range.
_YEARS_PATTERN = re.compile(r"\b\d{4}([ ,-]+\d{2,4})*\b")


def _strip_years(line):
return _YEARS_PATTERN.sub("", line)


def _license_line_matches(license_line, src_file_line, match_years_strictly):
license_line = license_line.strip()
src_file_line = src_file_line.strip()

if match_years_strictly:
return license_line == src_file_line

return _strip_years(license_line) == _strip_years(src_file_line)


def find_license_header_index(src_file_content,
license_info,
top_lines_count):
license_info: LicenseInfo,
top_lines_count,
match_years_strictly):
"""
Returns the line number, starting from 0 and lower than `top_lines_count`,
where the license header comment starts in this file, or else None.
"""
for i in range(top_lines_count):
license_match = True
for j, license_line in enumerate(license_info.prefixed_license):
if i + j >= len(src_file_content) or license_line.strip() != src_file_content[i + j].strip():
if (i + j >= len(src_file_content) or
not _license_line_matches(license_line,
src_file_content[i + j],
match_years_strictly)):
license_match = False
break
if license_match:
Expand Down
72 changes: 49 additions & 23 deletions tests/insert_license_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from contextlib import contextmanager
from datetime import datetime
from itertools import product
import os
import shutil
Expand Down Expand Up @@ -48,6 +49,12 @@
('module_without_license.py', '#', 'module_with_license_nospace.py', True, ['--no-space-in-comment-prefix']),
('module_without_license.php', '/*| *| */', 'module_with_license.php', True, ['--insert-license-after-regex', '^<\\?php$']),
('module_without_license.py', '#', 'module_with_license_noeol.py', True, ['--no-extra-eol']),
('module_without_license.groovy', '//', 'module_with_license.groovy', True, ['--use-current-year']),
('module_with_stale_year_in_license.py', '#', 'module_with_year_range_in_license.py', True, ['--use-current-year']),
('module_with_stale_year_range_in_license.py', '#', 'module_with_year_range_in_license.py', True, ['--use-current-year']),
('module_with_badly_formatted_stale_year_range_in_license.py', '#', 'module_with_badly_formatted_stale_year_range_in_license.py', False,
['--use-current-year']),
),
)),
)
Expand All @@ -69,6 +76,8 @@ def test_insert_license(license_file_path,
if new_src_file_expected:
with open(new_src_file_expected, encoding=encoding) as expected_content_file:
expected_content = expected_content_file.read()
if '--use-current-year' in args:
expected_content = expected_content.replace("2017", str(datetime.now().year))
new_file_content = path.open(encoding=encoding).read()
assert new_file_content == expected_content

Expand Down Expand Up @@ -127,23 +136,29 @@ def test_fuzzy_match_license(license_file_path,


@pytest.mark.parametrize(
('src_file_content', 'expected_index'),
('src_file_content', 'expected_index', 'match_years_strictly'),
(
(['foo\n', 'bar\n'], None),
(['# License line 1\n', '# License line 2\n', '\n', 'foo\n', 'bar\n'], 0),
(['\n', '# License line 1\n', '# License line 2\n', 'foo\n', 'bar\n'], 1),
(['foo\n', 'bar\n'], None, True),
(['# License line 1\n', '# Copyright 2017\n', '\n', 'foo\n', 'bar\n'], 0, True),
(['\n', '# License line 1\n', '# Copyright 2017\n', 'foo\n', 'bar\n'], 1, True),
(['\n', '# License line 1\n', '# Copyright 2017\n', 'foo\n', 'bar\n'], 1, False),
(['# License line 1\n', '# Copyright 1984\n', '\n', 'foo\n', 'bar\n'], None, True),
(['# License line 1\n', '# Copyright 1984\n', '\n', 'foo\n', 'bar\n'], 0, False),
(['\n', '# License line 1\n', '# Copyright 2013,2015-2016\n', 'foo\n', 'bar\n'], 1, False),
),
)
def test_is_license_present(src_file_content, expected_index):
def test_is_license_present(src_file_content, expected_index, match_years_strictly):
license_info = LicenseInfo(
plain_license="",
eol="\n",
comment_start="",
comment_prefix="#",
comment_end="",
num_extra_lines=0,
prefixed_license=['# License line 1\n', '# License line 2\n'])
assert expected_index == find_license_header_index(src_file_content, license_info, 5)
prefixed_license=['# License line 1\n', '# Copyright 2017\n'])
assert expected_index == find_license_header_index(
src_file_content, license_info, 5, match_years_strictly=match_years_strictly
)


@pytest.mark.parametrize(
Expand All @@ -152,27 +167,35 @@ def test_is_license_present(src_file_content, expected_index):
'comment_style',
'fuzzy_match',
'new_src_file_expected',
'fail_check'),
'fail_check',
'use_current_year'),
map(lambda a: a[:1] + a[1], product( # combine license files with other args
('LICENSE_with_trailing_newline.txt', 'LICENSE_without_trailing_newline.txt'),
(
('module_with_license.css', '/*| *| */', False, 'module_without_license.css', True),
('module_with_license.css', '/*| *| */', False, 'module_without_license.css', True, False),
('module_with_license_and_few_words.css', '/*| *| */', False,
'module_without_license_and_few_words.css', True),
('module_with_license_todo.css', '/*| *| */', False, None, True),
('module_with_fuzzy_matched_license.css', '/*| *| */', False, None, False),
('module_without_license.css', '/*| *| */', False, None, False),
('module_with_license.py', '#', False, 'module_without_license.py', True),
('module_with_license_and_shebang.py', '#', False, 'module_without_license_and_shebang.py', True),
('init_with_license.py', '#', False, 'init_without_license.py', True),
('init_with_license_and_newline.py', '#', False, 'init_without_license.py', True),
'module_without_license_and_few_words.css', True, False),
('module_with_license_todo.css', '/*| *| */', False, None, True, False),
('module_with_fuzzy_matched_license.css', '/*| *| */', False, None, False, False),
('module_without_license.css', '/*| *| */', False, None, False, False),
('module_with_license.py', '#', False, 'module_without_license.py', True, False),
('module_with_license_and_shebang.py', '#', False, 'module_without_license_and_shebang.py', True, False),
('init_with_license.py', '#', False, 'init_without_license.py', True, False),
('init_with_license_and_newline.py', '#', False, 'init_without_license.py', True, False),
# Fuzzy match
('module_with_license.css', '/*| *| */', True, 'module_without_license.css', True),
('module_with_license_todo.css', '/*| *| */', True, None, True),
('module_with_fuzzy_matched_license.css', '/*| *| */', True, 'module_with_license_todo.css', True),
('module_without_license.css', '/*| *| */', True, None, False),
('module_with_license_and_shebang.py', '#', True, 'module_without_license_and_shebang.py', True),
('module_with_license.css', '/*| *| */', True, 'module_without_license.css', True, False),
('module_with_license_todo.css', '/*| *| */', True, None, True, False),
('module_with_fuzzy_matched_license.css', '/*| *| */', True, 'module_with_license_todo.css', True, False),
('module_without_license.css', '/*| *| */', True, None, False, False),
('module_with_license_and_shebang.py', '#', True, 'module_without_license_and_shebang.py', True, False),
# Strict and flexible years
('module_with_stale_year_in_license.py', '#', False, None, False, False),
('module_with_stale_year_range_in_license.py', '#', False, None, False, False),
('module_with_license.py', '#', False, 'module_without_license.py', True, True),
('module_with_stale_year_in_license.py', '#', False, 'module_without_license.py', True, True),
('module_with_stale_year_range_in_license.py', '#', False, 'module_without_license.py', True, True),
('module_with_badly_formatted_stale_year_range_in_license.py', '#', False, 'module_without_license.py', True, True),
),
)),
)
Expand All @@ -182,6 +205,7 @@ def test_remove_license(license_file_path,
fuzzy_match,
new_src_file_expected,
fail_check,
use_current_year,
tmpdir):
with chdir_to_test_resources():
path = tmpdir.join('src_file_path')
Expand All @@ -191,6 +215,8 @@ def test_remove_license(license_file_path,
'--comment-style', comment_style]
if fuzzy_match:
argv = ['--fuzzy-match-generates-todo'] + argv
if use_current_year:
argv = ['--use-current-year'] + argv
assert insert_license(argv) == (1 if fail_check else 0)
if new_src_file_expected:
with open(new_src_file_expected, encoding='utf-8') as expected_content_file:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (C) 2015 -- 16 Teela O'Malley
#
# Licensed under the Apache License, Version 2.0 (the "License");

import sys
sys.stdout.write("FOO")
6 changes: 6 additions & 0 deletions tests/resources/module_with_stale_year_in_license.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (C) 2015 Teela O'Malley
#
# Licensed under the Apache License, Version 2.0 (the "License");

import sys
sys.stdout.write("FOO")
6 changes: 6 additions & 0 deletions tests/resources/module_with_stale_year_range_in_license.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (C) 2015-2016 Teela O'Malley
#
# Licensed under the Apache License, Version 2.0 (the "License");

import sys
sys.stdout.write("FOO")
6 changes: 6 additions & 0 deletions tests/resources/module_with_year_range_in_license.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (C) 2015-2017 Teela O'Malley
#
# Licensed under the Apache License, Version 2.0 (the "License");

import sys
sys.stdout.write("FOO")

0 comments on commit 833fa5f

Please sign in to comment.