Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

contrib/utilities/update-copyright.sh: several improvements #16689

Merged
merged 1 commit into from
Feb 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
202 changes: 157 additions & 45 deletions contrib/utilities/update-copyright.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
##
## ---------------------------------------------------------------------

set -u

# Purpose: Update the copyright year of every file based on the last
# modification recorded in the git logs
Expand All @@ -24,51 +25,162 @@ if test ! -d source -o ! -d include -o ! -d examples ; then
exit
fi

processes=1
accurate_first_year=false
until [[ "$@" == "" ]]; do
case $1 in
--pedantic)
accurate_first_year=true
shift;;
-j)
shift
if [[ "$@" == "" ]]; then
echo "Error: »-j« must be followed by a number" > /dev/stderr
echo "Usage: update-copyright.sh [--pedantic] [-j N]" > /dev/stderr
exit 1
fi
processes="${1}"
shift;;
*)
echo "Error: invalid option »$1«" > /dev/stderr
echo "Usage: update-copyright.sh [--pedantic] [-j N]" > /dev/stderr
exit 1;;
esac
done

#
# A shell function that updates the copyright string for a given file $1:
#

update_copyright()
{
file="${1}"

if ! [ -f ${file} ]; then
echo "Skipping ${file}: not a file"
return
fi

if ! head -13 ${file} | grep -q "^.. This file is part of the deal.II library.$" ; then
echo "Skipping ${file}: no deal.II copyright header"
return
fi

files="
$(echo contrib/*/*.{py,sh} \
contrib/python-bindings/CMakeLists.txt \
contrib/python-bindings/*/*.{h,cc,py} \
contrib/utilities/{update-copyright,indent} \
doc/doxygen/*/*.{h,h.in} \
doc/doxygen/scripts/*.pl \
doc/screen.css)
$(find include/ source/ examples/ | egrep '\.(h|in|cc|cu)$')
$(find cmake/ | egrep '\.(cmake|in|cc)$')
$(find . -name CMakeLists.txt)
$(find tests/ | egrep '\.(h|cc)$')
$(find doc/ | egrep '\.html$')
"


for i in $files ; do
# get the last year this file was modified from the git log.
# we don't want to see patches that just updated the copyright
# year, so output the dates and log messages of the last 3
# commits, throw away all that mention both the words
# "update" and "copyright", and take the year of the first
# message that remains
#
# (it should be enough to look at the last 2 messages since
# ideally no two successive commits should have updated the
# copyright year. let's err on the safe side and take the last
# 3 commits.)
last_year=`git log -n 3 --date=short --format="format:%cd %s" $i | \
egrep -i -v "update.*copyright|copyright.*update" | \
head -n 1 | \
perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`

# get the first year this file was modified from the actual
# file. this may predate the git log if the file was copied
# from elsewhere
first_year=`cat $i | egrep 'Copyright \(C\) [0-9]{4}' | \
perl -p -e "s/.*Copyright \(C\) (\d{4}).*/\1/g;"`

# print a status message. we really only have to update
# the copyright year if the first and last year are
# different
echo "Processing $i: ${first_year} - ${last_year}"
if test ! "${first_year}" = "${last_year}" ; then
perl -pi -e "s/(Copyright \(C\) \d{4})( - \d{4})?(, \d{4}( - \d{4})?)*/\1 - ${last_year}/g;" $i
# Get the last year this file was modified from the git log. We don't
# want to see patches that just updated the copyright year, thus find the
# first commit that
# - does not mention both the words "update" and "copyright", as well as
# - "Update license headers".
#

last_year=`git log -n 3 --date=short --format="format:%cd %s" ${file} | \
egrep -i -v "update.*copyright|copyright.*update|Update license headers" | \
head -n 1 | \
perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`

#
# It should not happen, that the grep removes all 3 most recent commits
# simultaneously but if it does then run the git log command again with
# full history:
#

[ -z "$last_year" ] && last_year=`git log --date=short --format="format:%cd %s" ${file} | \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, why didn't you just write this as if test -z "$last_year" ; then ...?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really purely a stylistic choice:

  • test and [ are the same shell built-in. And [ -z ... ] looks better than test -z .... I should have probably used the extended test pattern [[ ... ]] consistently...
  • I tend to use the [[ ... ]] && VARIABLE="..." a lot for simple conditional variable assignments. That's somewhat a bash coding style coming from Gentoo ebuilds. But yes, we could have used an if statement as well.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. It's perhaps a bit harder to read, but I'm ok with the stylistic choice.

egrep -i -v "update.*copyright|copyright.*update|Update license headers" | \
head -n 1 | \
perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`

if [ -z "$last_year" ]; then
echo "Skipping ${file}: internal error: could not determine last copyright year"
return
fi
done

#
# Get the first year this file was modified from the actual file. This is
# fast but might be inaccurate.
#

first_year=`head -n 13 ${file} | egrep 'Copyright \(C\) [0-9]{4}' | \
perl -p -e "s/.*Copyright \(C\) (\d{4}).*/\1/g;"`

if [ -z "$first_year" ]; then
echo "Skipping ${file}: internal error: could not determine first copyright year"
return
fi

if $accurate_first_year; then
#
# Get the first (plausible) year this file was modified. While each file
# (ideally) already contains a start year, experience suggests that this
# information is typically wildly incorrect because files (and copyright
# headers) get copied all the time. We thus grab this information from
# git history.
#

#
# First grab the oldest commit from the file history
#

git_first_year=`git log --reverse --date=short --format="format:%cd %s" ${file} | \
head -n 1 | \
perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`
#
# Take the minimum of what is stated in the header and the first year
# the file was created in git:
#
first_year="$(( git_first_year < first_year ? git_first_year : first_year ))"

#
# Then, perform a more thorough search with `--diff-filter=A` to skip
# all but the first commit in which the file was created. We try to
# find and follow file renames with the `--follow` toggle.
#
# In corner cases, however, "git log --follow" can be way too
# aggressive. As a sanity check let's restrict the admissible date
# range to the date present in the file header, ${first_year}:
#
git_first_year=`git log --since="$first_year" --follow --diff-filter=A --date=short --format="format:%cd %s" ${file} | \
tail -n 1 | \
perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`

#
# If the above git command produced an output, use it. Otherwise fall
# back to ${first_year}:
#
first_year="${git_first_year:-${first_year}}"
fi

if [ -z "$first_year" ]; then
echo "Skipping ${file}: internal error: could not determine first copyright year"
return
fi

#
# Print a status message and update copyright line:
#

if [ "${first_year}" = "${last_year}" ]; then
echo "Processing ${file}: ${last_year}"
perl -pi -e "s/(^.. Copyright \(C\)) \d{4}( - \d{4})?(, \d{4}( - \d{4})?)*/\1 ${last_year}/g if 1..13;" ${file}
else
echo "Processing ${file}: ${first_year} - ${last_year}"
perl -pi -e "s/(^.. Copyright \(C\)) \d{4}( - \d{4})?(, \d{4}( - \d{4})?)*/\1 ${first_year} - ${last_year}/g if 1..13;" ${file}
fi
}

#
# Run copyright update in parallel:
#

process()
{
i=0
find ${1} -type f -regextype egrep -regex "${2}" | while read file; do
(( i=i%processes )); (( i++==0 )) && wait
update_copyright "${file}" &
done
}

process "." "CMakeLists.txt|CTestConfig.cmake" update_copyright
process "cmake contrib doc examples include source tests" ".*" update_copyright