Skip to content

Commit

Permalink
Contrib: update-copyright.sh general improvements
Browse files Browse the repository at this point in the history
 - make lookup of last modification year more robust

 - add a --pedantic mode that greps the first year a file was modified
   from git log

 - restructure processing into a separate bash function and add parallel
   processing structure

 - run script on all files of the repository
  • Loading branch information
tamiko committed Feb 22, 2024
1 parent acb749f commit d034789
Showing 1 changed file with 126 additions and 45 deletions.
171 changes: 126 additions & 45 deletions contrib/utilities/update-copyright.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
##
## ---------------------------------------------------------------------

set -u

# Purpose: Update the copyright year of every file based on the last
# modification recorded in the git logs
Expand All @@ -24,51 +25,131 @@ if test ! -d source -o ! -d include -o ! -d examples ; then
exit
fi

processes=1
accurate_first_year=false
until [[ "$@" == "" ]]; do
case $1 in
--pedantic)
accurate_first_year=true
shift;;
-j)
shift
if [[ "$@" == "" ]]; then
echo "Error: »-j« must be followed by a number" > /dev/stderr
echo "Usage: update-copyright.sh [--pedantic] [-j N]" > /dev/stderr
exit 1
fi
processes="${1}"
shift;;
*)
echo "Error: invalid option »$1«" > /dev/stderr
echo "Usage: update-copyright.sh [--pedantic] [-j N]" > /dev/stderr
exit 1;;
esac
done

#
# A shell function that updates the copyright string for a given file $1:
#

update_copyright()
{
file="${1}"

if ! [ -f ${file} ]; then
echo "Skipping ${file}: not a file"
return
fi

if ! head -13 ${file} | grep -q "^.. This file is part of the deal.II library.$" ; then
echo "Skipping ${file}: no deal.II copyright header"
return
fi

#
# Get the last year this file was modified from the git log. We don't
# want to see patches that just updated the copyright year, thus find the
# first commit that
# - does not mention both the words "update" and "copyright", as well as
# - "Update license headers".
#

last_year=`git log -n 3 --date=short --format="format:%cd %s" ${file} | \
egrep -i -v "update.*copyright|copyright.*update|Update license headers" | \
head -n 1 | \
perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`

#
# It should not happen, that the grep removes all 3 most recent commits
# simultaneously but if it does then run the git log command again with
# full history:
#

files="
$(echo contrib/*/*.{py,sh} \
contrib/python-bindings/CMakeLists.txt \
contrib/python-bindings/*/*.{h,cc,py} \
contrib/utilities/{update-copyright,indent} \
doc/doxygen/*/*.{h,h.in} \
doc/doxygen/scripts/*.pl \
doc/screen.css)
$(find include/ source/ examples/ | egrep '\.(h|in|cc|cu)$')
$(find cmake/ | egrep '\.(cmake|in|cc)$')
$(find . -name CMakeLists.txt)
$(find tests/ | egrep '\.(h|cc)$')
$(find doc/ | egrep '\.html$')
"


for i in $files ; do
# get the last year this file was modified from the git log.
# we don't want to see patches that just updated the copyright
# year, so output the dates and log messages of the last 3
# commits, throw away all that mention both the words
# "update" and "copyright", and take the year of the first
# message that remains
[ -z "$last_year" ] && last_year=`git log --date=short --format="format:%cd %s" ${file} | \
egrep -i -v "update.*copyright|copyright.*update|Update license headers" | \
head -n 1 | \
perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`

if ! $accurate_first_year; then
#
# Get the first year this file was modified from the actual file. This is
# fast but might be inaccurate.
#
first_year=`egrep 'Copyright \(C\) [0-9]{4}' ${file} | \
perl -p -e "s/.*Copyright \(C\) (\d{4}).*/\1/g;"`
else
#
# Get the first (plausible) year this file was modified. While each file
# (ideally) already contains a start year, experience suggests that this
# information is typically wildly incorrect because files (and copyright
# headers) get copied all the time. We thus grab this information from
# git history (which is a rather expensive operation - but so is running
# this script in the first place).
#
# * In order to make the git log lookup as fast as possible we use
# `--diff-filter=A` to skip all but the first commit in which the file
# was created.
#
# We try to find simple renames with the `--follow` toggle. Note that
# - we enforce a 90% similarity when trying to find a potential rename
# candiate with `-M90%`
# - we specifically do not use `--find-copies` here because we want to
# track the beginning of every individual file (and not the start date
# of the original one).
# - `--reverse` cannot be used in combination with `--follow`. So we
# simply `tail` the output.
#
first_year=`git log --follow -M90% --diff-filter=A --date=short --format="format:%cd %s" ${file} | \
tail -n 1 | \
perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`
fi

#
# Print a status message and update copyright line:
#
# (it should be enough to look at the last 2 messages since
# ideally no two successive commits should have updated the
# copyright year. let's err on the safe side and take the last
# 3 commits.)
last_year=`git log -n 3 --date=short --format="format:%cd %s" $i | \
egrep -i -v "update.*copyright|copyright.*update" | \
head -n 1 | \
perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`

# get the first year this file was modified from the actual
# file. this may predate the git log if the file was copied
# from elsewhere
first_year=`cat $i | egrep 'Copyright \(C\) [0-9]{4}' | \
perl -p -e "s/.*Copyright \(C\) (\d{4}).*/\1/g;"`

# print a status message. we really only have to update
# the copyright year if the first and last year are
# different
echo "Processing $i: ${first_year} - ${last_year}"
if test ! "${first_year}" = "${last_year}" ; then
perl -pi -e "s/(Copyright \(C\) \d{4})( - \d{4})?(, \d{4}( - \d{4})?)*/\1 - ${last_year}/g;" $i

if [ "${first_year}" = "${last_year}" ]; then
echo "Processing ${file}: ${last_year}"
perl -pi -e "s/(^.. Copyright \(C\)) \d{4}( - \d{4})?(, \d{4}( - \d{4})?)*/\1 ${last_year}/g if 1..13;" ${file}
else
echo "Processing ${file}: ${first_year} - ${last_year}"
perl -pi -e "s/(^.. Copyright \(C\)) \d{4}( - \d{4})?(, \d{4}( - \d{4})?)*/\1 ${first_year} - ${last_year}/g if 1..13;" ${file}
fi
done
}

#
# Run copyright update in parallel:
#

process()
{
i=0
find ${1} -type f -regextype egrep -regex "${2}" | while read file; do
(( i=i%processes )); (( i++==0 )) && wait
update_copyright "${file}" &
done
}

process "." "CMakeLists.txt|CTestConfig.cmake" update_copyright
process "cmake contrib doc examples include source tests" ".*" update_copyright

0 comments on commit d034789

Please sign in to comment.