Permalink
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 13298 lines (12462 sloc) 531 KB
#!/usr/bin/env perl
# cloc -- Count Lines of Code {{{1
# Copyright (C) 2006-2018 Al Danial <al.danial@gmail.com>
# First release August 2006
#
# Includes code from:
# - SLOCCount v2.26
# http://www.dwheeler.com/sloccount/
# by David Wheeler.
# - Regexp::Common v2013031301
# http://search.cpan.org/~abigail/Regexp-Common-2013031301/lib/Regexp/Common.pm
# by Damian Conway and Abigail.
# - Win32::Autoglob
# http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm
# by Sean M. Burke.
# - Algorithm::Diff
# http://search.cpan.org/~tyemq/Algorithm-Diff-1.1902/lib/Algorithm/Diff.pm
# by Tye McQueen.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details:
# <http://www.gnu.org/licenses/gpl.txt>.
#
# 1}}}
my $VERSION = "1.77"; # odd number == beta; even number == stable
my $URL = "github.com/AlDanial/cloc"; # 'https://' pushes header too wide
require 5.006;
# use modules {{{1
use warnings;
use strict;
use Getopt::Long;
use File::Basename;
use File::Temp qw { tempfile tempdir };
use File::Find;
use File::Path;
use File::Spec;
use IO::File;
use List::Util qw( min max );
use Cwd;
use POSIX qw { strftime ceil};
# Parallel::ForkManager isn't in the standard distribution.
# Use it only if installed, and only if --processes=N is given.
# The module load happens in get_max_processes().
my $HAVE_Parallel_ForkManager = 0;
# Digest::MD5 isn't in the standard distribution. Use it only if installed.
my $HAVE_Digest_MD5 = 0;
eval "use Digest::MD5;";
if (defined $Digest::MD5::VERSION) {
$HAVE_Digest_MD5 = 1;
} else {
warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
}
# Time::HiRes became standard with Perl 5.8
my $HAVE_Time_HiRes = 0;
eval "use Time::HiRes;";
$HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
my $HAVE_Rexexp_Common;
# Regexp::Common isn't in the standard distribution. It will
# be installed in a temp directory if necessary.
BEGIN {
if (eval "use Regexp::Common;") {
$HAVE_Rexexp_Common = 1;
} else {
$HAVE_Rexexp_Common = 0;
}
}
my $HAVE_Algorith_Diff = 0;
# Algorithm::Diff isn't in the standard distribution. It will
# be installed in a temp directory if necessary.
eval "use Algorithm::Diff qw ( sdiff ) ";
if (defined $Algorithm::Diff::VERSION) {
$HAVE_Algorith_Diff = 1;
} else {
Install_Algorithm_Diff();
}
# print "2 HAVE_Algorith_Diff = $HAVE_Algorith_Diff\n";
# test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
# die "Hre=$HAVE_Rexexp_Common Had=$HAVE_Algorith_Diff";
# Uncomment next two lines when building Windows executable with perl2exe
# or if running on a system that already has Regexp::Common.
#use Regexp::Common;
#$HAVE_Rexexp_Common = 1;
#perl2exe_include "Regexp/Common/whitespace.pm"
#perl2exe_include "Regexp/Common/URI.pm"
#perl2exe_include "Regexp/Common/URI/fax.pm"
#perl2exe_include "Regexp/Common/URI/file.pm"
#perl2exe_include "Regexp/Common/URI/ftp.pm"
#perl2exe_include "Regexp/Common/URI/gopher.pm"
#perl2exe_include "Regexp/Common/URI/http.pm"
#perl2exe_include "Regexp/Common/URI/pop.pm"
#perl2exe_include "Regexp/Common/URI/prospero.pm"
#perl2exe_include "Regexp/Common/URI/news.pm"
#perl2exe_include "Regexp/Common/URI/tel.pm"
#perl2exe_include "Regexp/Common/URI/telnet.pm"
#perl2exe_include "Regexp/Common/URI/tv.pm"
#perl2exe_include "Regexp/Common/URI/wais.pm"
#perl2exe_include "Regexp/Common/CC.pm"
#perl2exe_include "Regexp/Common/SEN.pm"
#perl2exe_include "Regexp/Common/number.pm"
#perl2exe_include "Regexp/Common/delimited.pm"
#perl2exe_include "Regexp/Common/profanity.pm"
#perl2exe_include "Regexp/Common/net.pm"
#perl2exe_include "Regexp/Common/zip.pm"
#perl2exe_include "Regexp/Common/comment.pm"
#perl2exe_include "Regexp/Common/balanced.pm"
#perl2exe_include "Regexp/Common/lingua.pm"
#perl2exe_include "Regexp/Common/list.pm"
#perl2exe_include "File/Glob.pm"
use Text::Tabs qw { expand };
use Cwd qw { cwd };
use File::Glob;
# 1}}}
# Usage information, options processing. {{{1
my $ON_WINDOWS = 0;
$ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
if ($ON_WINDOWS and $ENV{'SHELL'}) {
if ($ENV{'SHELL'} =~ m{^/}) {
$ON_WINDOWS = 0; # make Cygwin look like Unix
} else {
$ON_WINDOWS = 1; # MKS defines $SHELL but still acts like Windows
}
}
my $config_file = '';
if ( $ENV{'HOME'} ) {
$config_file = File::Spec->catfile( $ENV{'HOME'}, '.config', 'cloc', 'options.txt');
} elsif ( $ENV{'APPDATA'} and $ON_WINDOWS ) {
$config_file = File::Spec->catfile( $ENV{'APPDATA'}, 'cloc');
}
my $NN = chr(27) . "[0m"; # normal
$NN = "" if $ON_WINDOWS or !(-t STDERR); # -t STDERR: is it a terminal?
my $BB = chr(27) . "[1m"; # bold
$BB = "" if $ON_WINDOWS or !(-t STDERR);
my $script = basename $0;
my $brief_usage = "
cloc -- Count Lines of Code
Usage:
$script [options] <file(s)/dir(s)/git hash(es)>
Count physical lines of source code and comments in the given files
(may be archives such as compressed tarballs or zip files) and/or
recursively below the given directories or git commit hashes.
Example: cloc src/ include/ main.c
$script [options] --diff <set1> <set2>
Compute differences of physical lines of source code and comments
between any pairwise combination of directory names, archive
files or git commit hashes.
Example: cloc --diff Python-3.5.tar.xz python-3.6/
$script --help shows full documentation on the options.
http://$URL has numerous examples and more information.
";
my $usage = "
Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <report files>
Count, or compute differences of, physical lines of source code in the
given files (may be archives such as compressed tarballs or zip files,
or git commit hashes or branch names) and/or recursively below the
given directories.
${BB}Input Options${NN}
--extract-with=<cmd> This option is only needed if cloc is unable
to figure out how to extract the contents of
the input file(s) by itself.
Use <cmd> to extract binary archive files (e.g.:
.tar.gz, .zip, .Z). Use the literal '>FILE<' as
a stand-in for the actual file(s) to be
extracted. For example, to count lines of code
in the input files
gcc-4.2.tar.gz perl-5.8.8.tar.gz
on Unix use
--extract-with='gzip -dc >FILE< | tar xf -'
or, if you have GNU tar,
--extract-with='tar zxf >FILE<'
and on Windows use, for example:
--extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
(if WinZip is installed there).
--list-file=<file> Take the list of file and/or directory names to
process from <file>, which has one file/directory
name per line. Only exact matches are counted;
relative path names will be resolved starting from
the directory where cloc is invoked.
See also --exclude-list-file.
--vcs=<VCS> Invoke a system call to <VCS> to obtain a list of
files to work on. If <VCS> is 'git', then will
invoke 'git ls-files' to get a file list and
'git submodule status' to get a list of submodules
whose contents will be ignored. See also --git
which accepts git commit hashes and branch names.
If <VCS> is 'svn' then will invoke 'svn list -R'.
The primary benefit is that cloc will then skip
files explicitly excluded by the versioning tool
in question, ie, those in .gitignore or have the
svn:ignore property.
Alternatively <VCS> may be any system command
that generates a list of files.
Note: cloc must be in a directory which can read
the files as they are returned by <VCS>. cloc will
not download files from remote repositories.
'svn list -R' may refer to a remote repository
to obtain file names (and therefore may require
authentication to the remote repository), but
the files themselves must be local.
--unicode Check binary files to see if they contain Unicode
expanded ASCII text. This causes performance to
drop noticeably.
${BB}Processing Options${NN}
--autoconf Count .in files (as processed by GNU autoconf) of
recognized languages. See also --no-autogen.
--by-file Report results for every source file encountered.
--by-file-by-lang Report results for every source file encountered
in addition to reporting by language.
--config <file> Read command line switches from <file> instead of
the default location of $config_file.
The file should contain one switch, along with
arguments (if any), per line. Blank lines and lines
beginning with '#' are skipped. Options given on
the command line take priority over entries read from
the file.
--count-and-diff <set1> <set2>
First perform direct code counts of source file(s)
of <set1> and <set2> separately, then perform a diff
of these. Inputs may be pairs of files, directories,
or archives. If --out or --report-file is given,
three output files will be created, one for each
of the two counts and one for the diff. See also
--diff, --diff-alignment, --diff-timeout,
--ignore-case, --ignore-whitespace.
--diff <set1> <set2> Compute differences in code and comments between
source file(s) of <set1> and <set2>. The inputs
may be any mix of files, directories, archives,
or git commit hashes. Use --diff-alignment to
generate a list showing which file pairs where
compared. See also --count-and-diff, --diff-alignment,
--diff-timeout, --ignore-case, --ignore-whitespace.
--diff-timeout <N> Ignore files which take more than <N> seconds
to process. Default is 10 seconds. Setting <N>
to 0 allows unlimited time. (Large files with many
repeated lines can cause Algorithm::Diff::sdiff()
to take hours.)
--follow-links [Unix only] Follow symbolic links to directories
(sym links to files are always followed).
--force-lang=<lang>[,<ext>]
Process all files that have a <ext> extension
with the counter for language <lang>. For
example, to count all .f files with the
Fortran 90 counter (which expects files to
end with .f90) instead of the default Fortran 77
counter, use
--force-lang=\"Fortran 90\",f
If <ext> is omitted, every file will be counted
with the <lang> counter. This option can be
specified multiple times (but that is only
useful when <ext> is given each time).
See also --script-lang, --lang-no-ext.
--force-lang-def=<file> Load language processing filters from <file>,
then use these filters instead of the built-in
filters. Note: languages which map to the same
file extension (for example:
MATLAB/Mathematica/Objective C/MUMPS/Mercury;
Pascal/PHP; Lisp/OpenCL; Lisp/Julia; Perl/Prolog)
will be ignored as these require additional
processing that is not expressed in language
definition files. Use --read-lang-def to define
new language filters without replacing built-in
filters (see also --write-lang-def).
--git Forces the inputs to be interpreted as git targets
(commit hashes, branch names, et cetera) if these
are not first identified as file or directory
names. This option overrides the --vcs=git logic
if this is given; in other words, --git gets its
list of files to work on directly from git using
the hash or branch name rather than from
'git ls-files'. This option can be used with
--diff to perform line count diffs between git
commits, or between a git commit and a file,
directory, or archive. Use -v/--verbose to see
the git system commands cloc issues.
--ignore-whitespace Ignore horizontal white space when comparing files
with --diff. See also --ignore-case.
--ignore-case Ignore changes in case; consider upper- and lower-
case letters equivalent when comparing files with
--diff. See also --ignore-whitespace.
--lang-no-ext=<lang> Count files without extensions using the <lang>
counter. This option overrides internal logic
for files without extensions (where such files
are checked against known scripting languages
by examining the first line for #!). See also
--force-lang, --script-lang.
--max-file-size=<MB> Skip files larger than <MB> megabytes when
traversing directories. By default, <MB>=100.
cloc's memory requirement is roughly twenty times
larger than the largest file so running with
files larger than 100 MB on a computer with less
than 2 GB of memory will cause problems.
Note: this check does not apply to files
explicitly passed as command line arguments.
--no-autogen[=list] Ignore files generated by code-production systems
such as GNU autoconf. To see a list of these files
(then exit), run with --no-autogen list
See also --autoconf.
--original-dir [Only effective in combination with
--strip-comments] Write the stripped files
to the same directory as the original files.
--read-binary-files Process binary files in addition to text files.
This is usually a bad idea and should only be
attempted with text files that have embedded
binary data.
--read-lang-def=<file> Load new language processing filters from <file>
and merge them with those already known to cloc.
If <file> defines a language cloc already knows
about, cloc's definition will take precedence.
Use --force-lang-def to over-ride cloc's
definitions (see also --write-lang-def ).
--script-lang=<lang>,<s> Process all files that invoke <s> as a #!
scripting language with the counter for language
<lang>. For example, files that begin with
#!/usr/local/bin/perl5.8.8
will be counted with the Perl counter by using
--script-lang=Perl,perl5.8.8
The language name is case insensitive but the
name of the script language executable, <s>,
must have the right case. This option can be
specified multiple times. See also --force-lang,
--lang-no-ext.
--sdir=<dir> Use <dir> as the scratch directory instead of
letting File::Temp chose the location. Files
written to this location are not removed at
the end of the run (as they are with File::Temp).
--skip-uniqueness Skip the file uniqueness check. This will give
a performance boost at the expense of counting
files with identical contents multiple times
(if such duplicates exist).
--stdin-name=<file> Give a file name to use to determine the language
for standard input. (Use - as the input name to
receive source code via STDIN.)
--strip-comments=<ext> For each file processed, write to the current
directory a version of the file which has blank
and commented lines removed (in-line comments
persist). The name of each stripped file is the
original file name with .<ext> appended to it.
It is written to the current directory unless
--original-dir is on.
--strip-str-comments Replace comment markers embedded in strings with
'xx'. This attempts to work around a limitation
in Regexp::Common::Comment where comment markers
embedded in strings are seen as actual comment
markers and not strings, often resulting in a
'Complex regular subexpression recursion limit'
warning and incorrect counts. There are two
disadvantages to using this switch: 1/code count
performance drops, and 2/code generated with
--strip-comments will contain different strings
where ever embedded comments are found.
--sum-reports Input arguments are report files previously
created with the --report-file option. Makes
a cumulative set of results containing the
sum of data from the individual report files.
--processes=NUM [Available only on systems with a recent version
of the Parallel::ForkManager module. Not
available on Windows.] Sets the maximum number of
cores that cloc uses. The default value of 0
disables multiprocessing.
--unix Override the operating system autodetection
logic and run in UNIX mode. See also
--windows, --show-os.
--use-sloccount If SLOCCount is installed, use its compiled
executables c_count, java_count, pascal_count,
php_count, and xml_count instead of cloc's
counters. SLOCCount's compiled counters are
substantially faster than cloc's and may give
a performance improvement when counting projects
with large files. However, these cloc-specific
features will not be available: --diff,
--count-and-diff, --strip-comments, --unicode.
--windows Override the operating system autodetection
logic and run in Microsoft Windows mode.
See also --unix, --show-os.
${BB}Filter Options${NN}
--exclude-dir=<D1>[,D2,] Exclude the given comma separated directories
D1, D2, D3, et cetera, from being scanned. For
example --exclude-dir=.cache,test will skip
all files and subdirectories that have /.cache/
or /test/ as their parent directory.
Directories named .bzr, .cvs, .hg, .git, .svn,
and .snapshot are always excluded.
This option only works with individual directory
names so including file path separators is not
allowed. Use --fullpath and --not-match-d=<regex>
to supply a regex matching multiple subdirectories.
--exclude-ext=<ext1>[,<ext2>[...]]
Do not count files having the given file name
extensions.
--exclude-lang=<L1>[,L2[...]]
Exclude the given comma separated languages
L1, L2, L3, et cetera, from being counted.
--exclude-list-file=<file> Ignore files and/or directories whose names
appear in <file>. <file> should have one file
name per line. Only exact matches are ignored;
relative path names will be resolved starting from
the directory where cloc is invoked.
See also --list-file.
--fullpath Modifies the behavior of --match-f, --not-match-f,
and --not-match-d to include the file's path
in the regex, not just the file's basename.
(This does not expand each file to include its
absolute path, instead it uses as much of
the path as is passed in to cloc.)
Note: --match-d always looks at the full
path and therefore is unaffected by --fullpath.
--include-ext=<ext1>[,ext2[...]]
Count only languages having the given comma
separated file extensions. Use --show-ext to
see the recognized extensions.
--include-lang=<L1>[,L2[...]]
Count only the given comma separated languages
L1, L2, L3, et cetera. Use --show-lang to see
the list of recognized languages.
--match-d=<regex> Only count files in directories matching the Perl
regex. For example
--match-d='/(src|include)/'
only counts files in directories containing
/src/ or /include/. Unlike --not-match-d,
--match-f, and --not-match-f, --match-d always
compares the fully qualified path against the
regex.
--not-match-d=<regex> Count all files except those in directories
matching the Perl regex. Only the trailing
directory name is compared, for example, when
counting in /usr/local/lib, only 'lib' is
compared to the regex.
Add --fullpath to compare parent directories to
the regex.
Do not include file path separators at the
beginning or end of the regex.
--match-f=<regex> Only count files whose basenames match the Perl
regex. For example
--match-f='^[Ww]idget'
only counts files that start with Widget or widget.
Add --fullpath to include parent directories
in the regex instead of just the basename.
--not-match-f=<regex> Count all files except those whose basenames
match the Perl regex. Add --fullpath to include
parent directories in the regex instead of just
the basename.
--skip-archive=<regex> Ignore files that end with the given Perl regular
expression. For example, if given
--skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)'
the code will skip files that end with .zip,
.tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and
.tar.7z.
--skip-win-hidden On Windows, ignore hidden files.
${BB}Debug Options${NN}
--categorized=<file> Save names of categorized files to <file>.
--counted=<file> Save names of processed source files to <file>.
--diff-alignment=<file> Write to <file> a list of files and file pairs
showing which files were added, removed, and/or
compared during a run with --diff. This switch
forces the --diff mode on.
--explain=<lang> Print the filters used to remove comments for
language <lang> and exit. In some cases the
filters refer to Perl subroutines rather than
regular expressions. An examination of the
source code may be needed for further explanation.
--help Print this usage information and exit.
--found=<file> Save names of every file found to <file>.
--ignored=<file> Save names of ignored files and the reason they
were ignored to <file>.
--print-filter-stages Print processed source code before and after
each filter is applied.
--show-ext[=<ext>] Print information about all known (or just the
given) file extensions and exit.
--show-lang[=<lang>] Print information about all known (or just the
given) languages and exit.
--show-os Print the value of the operating system mode
and exit. See also --unix, --windows.
-v[=<n>] Verbose switch (optional numeric value).
-verbose[=<n>] Long form of -v.
--version Print the version of this program and exit.
--write-lang-def=<file> Writes to <file> the language processing filters
then exits. Useful as a first step to creating
custom language definitions (see also
--force-lang-def, --read-lang-def).
${BB}Output Options${NN}
--3 Print third-generation language output.
(This option can cause report summation to fail
if some reports were produced with this option
while others were produced without it.)
--by-percent X Instead of comment and blank line counts, show
these values as percentages based on the value
of X in the denominator:
X = 'c' -> # lines of code
X = 'cm' -> # lines of code + comments
X = 'cb' -> # lines of code + blanks
X = 'cmb' -> # lines of code + comments + blanks
For example, if using method 'c' and your code
has twice as many lines of comments as lines
of code, the value in the comment column will
be 200%. The code column remains a line count.
--csv Write the results as comma separated values.
--csv-delimiter=<C> Use the character <C> as the delimiter for comma
separated files instead of ,. This switch forces
--hide-rate Don't show rate of processing files/lines. This
makes output deterministic.
--json Write the results as JavaScript Object Notation
(JSON) formatted output.
--md Write the results as Markdown-formatted text.
--out=<file> Synonym for --report-file=<file>.
--progress-rate=<n> Show progress update after every <n> files are
processed (default <n>=100). Set <n> to 0 to
suppress progress output (useful when redirecting
output to STDOUT).
--quiet Suppress all information messages except for
the final report.
--report-file=<file> Write the results to <file> instead of STDOUT.
--sql=<file> Write results as SQL create and insert statements
which can be read by a database program such as
SQLite. If <file> is -, output is sent to STDOUT.
--sql-append Append SQL insert statements to the file specified
by --sql and do not generate table creation
statements. Only valid with the --sql option.
--sql-project=<name> Use <name> as the project identifier for the
current run. Only valid with the --sql option.
--sql-style=<style> Write SQL statements in the given style instead
of the default SQLite format. Currently, the
only style option is Oracle.
--sum-one For plain text reports, show the SUM: output line
even if only one input file is processed.
--xml Write the results in XML.
--xsl=<file> Reference <file> as an XSL stylesheet within
the XML output. If <file> is 1 (numeric one),
writes a default stylesheet, cloc.xsl (or
cloc-diff.xsl if --diff is also given).
This switch forces --xml on.
--yaml Write the results in YAML.
";
# Help information for options not yet implemented:
# --inline Process comments that appear at the end
# of lines containing code.
# --html Create HTML files of each input file showing
# comment and code lines in different colors.
$| = 1; # flush STDOUT
my $start_time = get_time();
my (
$opt_categorized ,
$opt_found ,
@opt_force_lang ,
$opt_lang_no_ext ,
@opt_script_lang ,
$opt_count_diff ,
$opt_diff ,
$opt_diff_alignment ,
$opt_diff_timeout ,
$opt_html ,
$opt_ignored ,
$opt_counted ,
$opt_show_ext ,
$opt_show_lang ,
$opt_progress_rate ,
$opt_print_filter_stages ,
$opt_v ,
$opt_vcs ,
$opt_version ,
$opt_exclude_lang ,
$opt_exclude_list_file ,
$opt_exclude_dir ,
$opt_explain ,
$opt_include_ext ,
$opt_include_lang ,
$opt_force_lang_def ,
$opt_read_lang_def ,
$opt_write_lang_def ,
$opt_strip_comments ,
$opt_original_dir ,
$opt_quiet ,
$opt_report_file ,
$opt_sdir ,
$opt_sum_reports ,
$opt_hide_rate ,
$opt_processes ,
$opt_unicode ,
$opt_no3 , # accept it but don't use it
$opt_3 ,
$opt_extract_with ,
$opt_by_file ,
$opt_by_file_by_lang ,
$opt_by_percent ,
$opt_xml ,
$opt_xsl ,
$opt_yaml ,
$opt_csv ,
$opt_csv_delimiter ,
$opt_fullpath ,
$opt_json ,
$opt_md ,
$opt_match_f ,
$opt_not_match_f ,
$opt_match_d ,
$opt_not_match_d ,
$opt_skip_uniqueness ,
$opt_list_file ,
$opt_help ,
$opt_skip_win_hidden ,
$opt_read_binary_files ,
$opt_sql ,
$opt_sql_append ,
$opt_sql_project ,
$opt_sql_style ,
$opt_inline ,
$opt_exclude_ext ,
$opt_ignore_whitespace ,
$opt_ignore_case ,
$opt_follow_links ,
$opt_autoconf ,
$opt_sum_one ,
$opt_stdin_name ,
$opt_force_on_windows ,
$opt_force_on_unix , # actually forces !$ON_WINDOWS
$opt_show_os ,
$opt_skip_archive ,
$opt_max_file_size , # in MB
$opt_use_sloccount ,
$opt_no_autogen ,
$opt_force_git ,
$opt_config_file ,
$opt_strip_str_comments ,
);
my $getopt_success = GetOptions( # {{{1
"by_file|by-file" => \$opt_by_file ,
"by_file_by_lang|by-file-by-lang" => \$opt_by_file_by_lang ,
"categorized=s" => \$opt_categorized ,
"counted=s" => \$opt_counted ,
"include_ext|include-ext=s" => \$opt_include_ext ,
"include_lang|include-lang=s" => \$opt_include_lang ,
"exclude_lang|exclude-lang=s" => \$opt_exclude_lang ,
"exclude_dir|exclude-dir=s" => \$opt_exclude_dir ,
"exclude_list_file|exclude-list-file=s" => \$opt_exclude_list_file ,
"explain=s" => \$opt_explain ,
"extract_with|extract-with=s" => \$opt_extract_with ,
"found=s" => \$opt_found ,
"count_and_diff|count-and-diff" => \$opt_count_diff ,
"diff" => \$opt_diff ,
"diff-alignment|diff_alignment=s" => \$opt_diff_alignment ,
"diff-timeout|diff_timeout=i" => \$opt_diff_timeout ,
"html" => \$opt_html ,
"ignored=s" => \$opt_ignored ,
"quiet" => \$opt_quiet ,
"force_lang_def|force-lang-def=s" => \$opt_force_lang_def ,
"read_lang_def|read-lang-def=s" => \$opt_read_lang_def ,
"show_ext|show-ext:s" => \$opt_show_ext ,
"show_lang|show-lang:s" => \$opt_show_lang ,
"progress_rate|progress-rate=i" => \$opt_progress_rate ,
"print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
"report_file|report-file=s" => \$opt_report_file ,
"out=s" => \$opt_report_file ,
"script_lang|script-lang=s" => \@opt_script_lang ,
"sdir=s" => \$opt_sdir ,
"skip_uniqueness|skip-uniqueness" => \$opt_skip_uniqueness ,
"strip_comments|strip-comments=s" => \$opt_strip_comments ,
"original_dir|original-dir" => \$opt_original_dir ,
"sum_reports|sum-reports" => \$opt_sum_reports ,
"hide_rate|hide-rate" => \$opt_hide_rate ,
"processes=n" => \$opt_processes ,
"unicode" => \$opt_unicode ,
"no3" => \$opt_no3 , # ignored
"3" => \$opt_3 ,
"v|verbose:i" => \$opt_v ,
"vcs=s" => \$opt_vcs ,
"version" => \$opt_version ,
"write_lang_def|write-lang-def=s" => \$opt_write_lang_def ,
"xml" => \$opt_xml ,
"xsl=s" => \$opt_xsl ,
"force_lang|force-lang=s" => \@opt_force_lang ,
"lang_no_ext|lang-no-ext=s" => \$opt_lang_no_ext ,
"yaml" => \$opt_yaml ,
"csv" => \$opt_csv ,
"csv_delimeter|csv-delimiter=s" => \$opt_csv_delimiter ,
"json" => \$opt_json ,
"md" => \$opt_md ,
"fullpath" => \$opt_fullpath ,
"match_f|match-f=s" => \$opt_match_f ,
"not_match_f|not-match-f=s" => \$opt_not_match_f ,
"match_d|match-d=s" => \$opt_match_d ,
"not_match_d|not-match-d=s" => \$opt_not_match_d ,
"list_file|list-file=s" => \$opt_list_file ,
"help" => \$opt_help ,
"skip_win_hidden|skip-win-hidden" => \$opt_skip_win_hidden ,
"read_binary_files|read-binary-files" => \$opt_read_binary_files ,
"sql=s" => \$opt_sql ,
"sql_project|sql-project=s" => \$opt_sql_project ,
"sql_append|sql-append" => \$opt_sql_append ,
"sql_style|sql-style=s" => \$opt_sql_style ,
"inline" => \$opt_inline ,
"exclude_ext|exclude-ext=s" => \$opt_exclude_ext ,
"ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace ,
"ignore_case|ignore-case" => \$opt_ignore_case ,
"follow_links|follow-links" => \$opt_follow_links ,
"autoconf" => \$opt_autoconf ,
"sum_one|sum-one" => \$opt_sum_one ,
"by_percent|by-percent=s" => \$opt_by_percent ,
"stdin_name|stdin-name=s" => \$opt_stdin_name ,
"windows" => \$opt_force_on_windows ,
"unix" => \$opt_force_on_unix ,
"show_os|show-os" => \$opt_show_os ,
"skip_archive|skip-archive=s" => \$opt_skip_archive ,
"max_file_size|max-file-size=i" => \$opt_max_file_size ,
"use_sloccount|use-sloccount" => \$opt_use_sloccount ,
"no_autogen|no-autogen" => \$opt_no_autogen ,
"git" => \$opt_force_git ,
"config=s" => \$opt_config_file ,
"strip_str_comments|strip-str-comments" => \$opt_strip_str_comments ,
);
# 1}}}
$config_file = $opt_config_file if defined $opt_config_file;
load_from_config_file($config_file, # {{{1
\$opt_by_file ,
\$opt_by_file_by_lang ,
\$opt_categorized ,
\$opt_counted ,
\$opt_include_ext ,
\$opt_include_lang ,
\$opt_exclude_lang ,
\$opt_exclude_dir ,
\$opt_exclude_list_file ,
\$opt_explain ,
\$opt_extract_with ,
\$opt_found ,
\$opt_count_diff ,
\$opt_diff ,
\$opt_diff_alignment ,
\$opt_diff_timeout ,
\$opt_html ,
\$opt_ignored ,
\$opt_quiet ,
\$opt_force_lang_def ,
\$opt_read_lang_def ,
\$opt_show_ext ,
\$opt_show_lang ,
\$opt_progress_rate ,
\$opt_print_filter_stages ,
\$opt_report_file ,
\@opt_script_lang ,
\$opt_sdir ,
\$opt_skip_uniqueness ,
\$opt_strip_comments ,
\$opt_original_dir ,
\$opt_sum_reports ,
\$opt_hide_rate ,
\$opt_processes ,
\$opt_unicode ,
\$opt_3 ,
\$opt_v ,
\$opt_vcs ,
\$opt_version ,
\$opt_write_lang_def ,
\$opt_xml ,
\$opt_xsl ,
\@opt_force_lang ,
\$opt_lang_no_ext ,
\$opt_yaml ,
\$opt_csv ,
\$opt_csv_delimiter ,
\$opt_json ,
\$opt_md ,
\$opt_fullpath ,
\$opt_match_f ,
\$opt_not_match_f ,
\$opt_match_d ,
\$opt_not_match_d ,
\$opt_list_file ,
\$opt_help ,
\$opt_skip_win_hidden ,
\$opt_read_binary_files ,
\$opt_sql ,
\$opt_sql_project ,
\$opt_sql_append ,
\$opt_sql_style ,
\$opt_inline ,
\$opt_exclude_ext ,
\$opt_ignore_whitespace ,
\$opt_ignore_case ,
\$opt_follow_links ,
\$opt_autoconf ,
\$opt_sum_one ,
\$opt_by_percent ,
\$opt_stdin_name ,
\$opt_force_on_windows ,
\$opt_force_on_unix ,
\$opt_show_os ,
\$opt_skip_archive ,
\$opt_max_file_size ,
\$opt_use_sloccount ,
\$opt_no_autogen ,
\$opt_force_git ,
\$opt_strip_str_comments ,
); # 1}}} Not pretty. Not at all.
$opt_by_file = 1 if defined $opt_by_file_by_lang;
my $CLOC_XSL = "cloc.xsl"; # created with --xsl
$CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
die "\n" unless $getopt_success;
print $usage and exit if $opt_help;
my %Exclude_Language = ();
%Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang)
if $opt_exclude_lang;
my %Exclude_Dir = ();
%Exclude_Dir = map { $_ => 1 } split(/,/, $opt_exclude_dir )
if $opt_exclude_dir ;
die unless exclude_dir_validates(\%Exclude_Dir);
my %Include_Ext = ();
%Include_Ext = map { $_ => 1 } split(/,/, $opt_include_ext)
if $opt_include_ext;
my %Include_Language = ();
%Include_Language = map { $_ => 1 } split(/,/, $opt_include_lang)
if $opt_include_lang;
# Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories. The contents of these
# directories often conflict with files of interest.
$opt_exclude_dir = 1;
$Exclude_Dir{".svn"} = 1;
$Exclude_Dir{".cvs"} = 1;
$Exclude_Dir{".hg"} = 1;
$Exclude_Dir{".git"} = 1;
$Exclude_Dir{".bzr"} = 1;
$Exclude_Dir{".snapshot"} = 1; # NetApp backups
$opt_count_diff = defined $opt_count_diff ? 1 : 0;
$opt_diff = 1 if $opt_diff_alignment;
$opt_exclude_ext = "" unless $opt_exclude_ext;
$opt_ignore_whitespace = 0 unless $opt_ignore_whitespace;
$opt_ignore_case = 0 unless $opt_ignore_case;
$opt_lang_no_ext = 0 unless $opt_lang_no_ext;
$opt_follow_links = 0 unless $opt_follow_links;
if (defined $opt_diff_timeout) {
# if defined but with a value of <= 0, set to 2^31 seconds = 68 years
$opt_diff_timeout = 2**31 unless $opt_diff_timeout > 0;
} else {
$opt_diff_timeout =10; # seconds
}
$opt_csv = 1 if $opt_csv_delimiter;
$ON_WINDOWS = 1 if $opt_force_on_windows;
$ON_WINDOWS = 0 if $opt_force_on_unix;
$opt_max_file_size = 100 unless $opt_max_file_size;
my $HAVE_SLOCCOUNT_c_count = 0;
if (!$ON_WINDOWS and $opt_use_sloccount) {
# Only bother doing this kludgey test is user explicitly wants
# to use SLOCCount. Debian based systems will hang if just doing
# external_utility_exists("c_count")
# if c_count is in $PATH; c_count expects to have input.
$HAVE_SLOCCOUNT_c_count = external_utility_exists("c_count /bin/sh");
}
if ($opt_use_sloccount) {
if (!$HAVE_SLOCCOUNT_c_count) {
warn "c_count could not be found; ignoring --use-sloccount\n";
$opt_use_sloccount = 0;
} else {
warn "Using c_count, php_count, xml_count, pascal_count from SLOCCount\n";
warn "--diff is disabled with --use-sloccount\n" if $opt_diff;
warn "--count-and-diff is disabled with --use-sloccount\n" if $opt_count_diff;
warn "--unicode is disabled with --use-sloccount\n" if $opt_unicode;
warn "--strip-comments is disabled with --use-sloccount\n" if $opt_strip_comments;
$opt_diff = 0;
$opt_count_diff = undef;
$opt_unicode = 0;
$opt_strip_comments = 0;
}
}
$opt_vcs = 0 if $opt_force_git;
my @COUNT_DIFF_ARGV = undef;
my $COUNT_DIFF_report_file = undef;
if ($opt_count_diff) {
die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n"
if scalar @ARGV != 2;
# prefix with a dummy term so that $opt_count_diff is the
# index into @COUNT_DIFF_ARGV to work on at each pass
@COUNT_DIFF_ARGV = (undef, $ARGV[0],
$ARGV[1],
[$ARGV[0], $ARGV[1]]); # 3rd pass: diff them
$COUNT_DIFF_report_file = $opt_report_file if $opt_report_file;
}
# Options defaults:
$opt_quiet = 1 if ($opt_md or $opt_json) and !defined $opt_report_file;
$opt_progress_rate = 100 unless defined $opt_progress_rate;
$opt_progress_rate = 0 if defined $opt_quiet;
if (!defined $opt_v) {
$opt_v = 0;
} elsif (!$opt_v) {
$opt_v = 1;
}
if (defined $opt_xsl) {
$opt_xsl = $CLOC_XSL if $opt_xsl eq "1";
$opt_xml = 1;
}
my $skip_generate_report = 0;
$opt_sql_style = 0 unless defined $opt_sql_style;
$opt_sql = 0 unless $opt_sql_style or defined $opt_sql;
if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
$opt_quiet = 1;
$skip_generate_report = 1;
$opt_by_file = 1;
$opt_sum_reports = 0;
$opt_progress_rate = 0;
} elsif ($opt_sql) { # write SQL output to a file
$opt_by_file = 1;
$skip_generate_report = 1;
$opt_sum_reports = 0;
}
if ($opt_sql_style) {
$opt_sql_style = lc $opt_sql_style;
if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle )) {
die "'$opt_sql_style' is not a recognized SQL style.\n";
}
}
$opt_by_percent = '' unless defined $opt_by_percent;
if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
die "--by-percent must be either 'c', 'cm', 'cb', or 'cmb'\n";
}
$opt_by_percent = lc $opt_by_percent;
if (defined $opt_vcs) {
if ($opt_vcs eq "git") {
$opt_vcs = "git ls-files";
my @submodules = invoke_generator('git submodule status');
foreach my $SM (@submodules) {
$SM =~ s/^\s+//; # may have leading space
$SM =~ s/\(\S+\)\s*$//; # may end with something like (heads/master)
my ($checksum, $dir) = split(' ', $SM, 2);
$dir =~ s/\s+$//;
$Exclude_Dir{$dir} = 1;
}
} elsif ($opt_vcs eq "svn") {
$opt_vcs = "svn list -R";
}
}
my $list_no_autogen = 0;
if (defined $opt_no_autogen and scalar @ARGV == 1 and $ARGV[0] eq "list") {
$list_no_autogen = 1;
}
die $brief_usage unless defined $opt_version or
defined $opt_show_lang or
defined $opt_show_ext or
defined $opt_show_os or
defined $opt_write_lang_def or
defined $opt_list_file or
defined $opt_vcs or
defined $opt_xsl or
defined $opt_explain or
$list_no_autogen or
scalar @ARGV >= 1;
die "--diff requires two arguments; got ", scalar @ARGV, "\n"
if $opt_diff and !$opt_sum_reports and scalar @ARGV != 2;
die "--diff arguments are identical; nothing done", "\n"
if $opt_diff and !$opt_sum_reports and scalar @ARGV == 2
and $ARGV[0] eq $ARGV[1];
if ($opt_version) {
printf "$VERSION\n";
exit;
}
replace_git_hash_with_tarfile(\@ARGV);
# 1}}}
# Step 1: Initialize global constants. {{{1
#
my $nFiles_Found = 0; # updated in make_file_list
my (%Language_by_Extension, %Language_by_Script,
%Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
%Language_by_File, %Scale_Factor, %Known_Binary_Archives,
%EOL_Continuation_re,
);
my $ALREADY_SHOWED_HEADER = 0;
my $ALREADY_SHOWED_XML_SECTION = 0;
my %Error_Codes = ( 'Unable to read' => -1,
'Neither file nor directory' => -2,
'Diff error (quoted comments?)' => -3,
'Diff error, exceeded timeout' => -4,
'Line count, exceeded timeout' => -5,
);
my @Autogen_to_ignore = no_autogen_files($list_no_autogen);
if ($opt_force_lang_def) {
# replace cloc's definitions
read_lang_def(
$opt_force_lang_def , # Sample values:
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
\%Language_by_File , # Language_by_File{makefile} = 'make'
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
# [ 'remove_matches' , '^\s*#' ]
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
\%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$'
);
} else {
set_constants( #
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
\%Language_by_File , # Language_by_File{makefile} = 'make'
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
# [ 'remove_matches' , '^\s*#' ]
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
\%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
\%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$'
);
if ($opt_no_autogen) {
foreach my $F (@Autogen_to_ignore) { $Not_Code_Filename{ $F } = 1; }
}
}
if ($opt_read_lang_def) {
# augment cloc's definitions (keep cloc's where there are overlaps)
merge_lang_def(
$opt_read_lang_def , # Sample values:
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
\%Language_by_File , # Language_by_File{makefile} = 'make'
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
# [ 'remove_matches' , '^\s*#' ]
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
\%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$'
);
}
if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
die_unknown_lang($opt_lang_no_ext, "--lang-no-ext")
}
check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
\%Scale_Factor);
my $nCounted = 0;
# Process command line provided extension-to-language mapping overrides.
# Make a hash of known languages in lower case for easier matching.
my %Recognized_Language_lc = (); # key = language name in lc, value = true name
foreach my $language (keys %Filters_by_Language) {
my $lang_lc = lc $language;
$Recognized_Language_lc{$lang_lc} = $language;
}
my %Forced_Extension = (); # file name extensions which user wants to count
my $All_One_Language = 0; # set to !0 if --force-lang's <ext> is missing
foreach my $pair (@opt_force_lang) {
my ($lang, $extension) = split(',', $pair);
my $lang_lc = lc $lang;
if (defined $extension) {
$Forced_Extension{$extension} = $lang;
die_unknown_lang($lang, "--force-lang")
unless $Recognized_Language_lc{$lang_lc};
$Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
} else {
# the scary case--count everything as this language
$All_One_Language = $Recognized_Language_lc{$lang_lc};
}
}
foreach my $pair (@opt_script_lang) {
my ($lang, $script_name) = split(',', $pair);
my $lang_lc = lc $lang;
if (!defined $script_name) {
die "The --script-lang option requires a comma separated pair of ".
"strings.\n";
}
die_unknown_lang($lang, "--script-lang")
unless $Recognized_Language_lc{$lang_lc};
$Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
}
# If user provided file extensions to ignore, add these to
# the exclusion list.
foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
$ext = lc $ext if $ON_WINDOWS;
$Not_Code_Extension{$ext} = 1;
}
# If SQL or --by-file output is requested, keep track of directory names
# generated by File::Temp::tempdir and used to temporarily hold the results
# of compressed archives. Contents of the SQL table 't' will be much
# cleaner if these meaningless directory names are stripped from the front
# of files pulled from the archives.
my %TEMP_DIR = ();
my $TEMP_OFF = 0; # Needed for --sdir; keep track of the number of
# scratch directories made in this run to avoid
# file overwrites by multiple extractions to same
# sdir.
# Also track locations where temporary installations, if necessary, of
# Algorithm::Diff and/or Regexp::Common are done. Make sure these
# directories are not counted as inputs (ref bug #80 2012-11-23).
my %TEMP_INST = ();
# invert %Language_by_Script hash to get an easy-to-look-up list of known
# scripting languages
my %Script_Language = map { $_ => 1 } values %Language_by_Script ;
# 1}}}
# Step 2: Early exits for display, summation. {{{1
#
print_extension_info( $opt_show_ext ) if defined $opt_show_ext ;
print_language_info( $opt_show_lang, '') if defined $opt_show_lang;
print_language_filters( $opt_explain ) if defined $opt_explain ;
exit if (defined $opt_show_ext) or
(defined $opt_show_lang) or
(defined $opt_explain) or
$list_no_autogen;
Top_of_Processing_Loop:
# Sorry, coding purists. Using a goto to implement --count-and-diff
# which has to do three passes over the main code, starting with
# a clean slate each time.
if ($opt_count_diff) {
@ARGV = ( $COUNT_DIFF_ARGV[ $opt_count_diff ] );
if ($opt_count_diff == 3) {
$opt_diff = 1;
@ARGV = @{$COUNT_DIFF_ARGV[ $opt_count_diff ]}; # last arg is list of list
}
if ($opt_report_file) {
# Instead of just one output file, will have three.
# Keep their names unique otherwise results are clobbered.
# Replace file path separators with underscores otherwise
# may end up with illegal file names.
my ($fn_0, $fn_1) = (undef, undef);
if ($ON_WINDOWS) {
($fn_0 = $ARGV[0]) =~ s{\\}{_}g;
$fn_0 =~ s{::}{_}g;
($fn_1 = $ARGV[1]) =~ s{\\}{_}g if defined $ARGV[1];
$fn_1 =~ s{::}{_}g if defined $ARGV[1];
} else {
($fn_0 = $ARGV[0]) =~ s{/}{_}g;
($fn_1 = $ARGV[1]) =~ s{/}{_}g if defined $ARGV[1];
}
if ($opt_count_diff == 3) {
$opt_report_file = $COUNT_DIFF_report_file . ".diff.$fn_0.$fn_1";
} else {
$opt_report_file = $COUNT_DIFF_report_file . "." . $fn_0;
}
} else {
# STDOUT; print a header showing what it's working on
if ($opt_count_diff == 3) {
print "\ndiff $ARGV[0] $ARGV[1]::\n";
} else {
print "\n" if $opt_count_diff > 1;
print "$ARGV[0]::\n";
}
}
$ALREADY_SHOWED_HEADER = 0;
$ALREADY_SHOWED_XML_SECTION = 0;
}
#print "Before glob have [", join(",", @ARGV), "]\n";
@ARGV = windows_glob(@ARGV) if $ON_WINDOWS;
#print "after glob have [", join(",", @ARGV), "]\n";
# filter out archive files if requested to do so
if (defined $opt_skip_archive) {
my @non_archive = ();
foreach my $candidate (@ARGV) {
if ($candidate !~ m/${opt_skip_archive}$/) {
push @non_archive, $candidate;
}
}
@ARGV = @non_archive;
}
if ($opt_sum_reports and $opt_diff) {
my @results = ();
if ($opt_list_file) { # read inputs from the list file
my @list = read_list_file($opt_list_file);
@results = combine_diffs(\@list);
} elsif ($opt_vcs) { # read inputs from the VCS generator
my @list = invoke_generator($opt_vcs, \@ARGV);
@results = combine_diffs(\@list);
} else { # get inputs from the command line
@results = combine_diffs(\@ARGV);
}
if ($opt_report_file) {
write_file($opt_report_file, @results);
} else {
print "\n", join("\n", @results), "\n";
}
exit;
}
if ($opt_sum_reports) {
my %Results = ();
foreach my $type( "by language", "by report file" ) {
my $found_lang = undef;
if ($opt_list_file or $opt_vcs) {
# read inputs from the list file
my @list;
if ($opt_vcs) {
@list = invoke_generator($opt_vcs, \@ARGV);
} else {
@list = read_list_file($opt_list_file);
}
$found_lang = combine_results(\@list,
$type,
\%{$Results{ $type }},
\%Filters_by_Language );
} else { # get inputs from the command line
$found_lang = combine_results(\@ARGV,
$type,
\%{$Results{ $type }},
\%Filters_by_Language );
}
next unless %Results;
my $end_time = get_time();
my @results = generate_report($VERSION, $end_time - $start_time,
$type,
\%{$Results{ $type }}, \%Scale_Factor);
if ($opt_report_file) {
my $ext = ".lang";
$ext = ".file" unless $type eq "by language";
next if !$found_lang and $ext eq ".lang";
write_file($opt_report_file . $ext, @results);
} else {
print "\n", join("\n", @results), "\n";
}
}
exit;
}
if ($opt_write_lang_def) {
write_lang_def($opt_write_lang_def ,
\%Language_by_Extension,
\%Language_by_Script ,
\%Language_by_File ,
\%Filters_by_Language ,
\%Not_Code_Extension ,
\%Not_Code_Filename ,
\%Scale_Factor ,
\%EOL_Continuation_re ,
);
exit;
}
if ($opt_show_os) {
if ($ON_WINDOWS) {
print "Windows\n";
} else {
print "UNIX\n";
}
exit;
}
my $max_processes = get_max_processes();
# 1}}}
# Step 3: Create a list of files to consider. {{{1
# a) If inputs are binary archives, first cd to a temp
# directory, expand the archive with the user-given
# extraction tool, then add the temp directory to
# the list of dirs to process.
# b) Create a list of every file that might contain source
# code. Ignore binary files, zero-sized files, and
# any file in a directory the user says to exclude.
# c) Determine the language for each file in the list.
#
my @binary_archive = ();
my $cwd = cwd();
if ($opt_extract_with) {
#print "cwd main = [$cwd]\n";
my @extract_location = ();
foreach my $bin_file (@ARGV) {
my $extract_dir = undef;
if ($opt_sdir) {
++$TEMP_OFF;
$extract_dir = "$opt_sdir/$TEMP_OFF";
File::Path::rmtree($extract_dir) if is_dir($extract_dir);
File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
} else {
$extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
}
$TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
print "mkdir $extract_dir\n" if $opt_v;
print "cd $extract_dir\n" if $opt_v;
chdir $extract_dir;
my $bin_file_full_path = "";
if (File::Spec->file_name_is_absolute( $bin_file )) {
$bin_file_full_path = $bin_file;
#print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
} else {
$bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
#print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
}
my $extract_cmd = uncompress_archive_cmd($bin_file_full_path);
print $extract_cmd, "\n" if $opt_v;
system $extract_cmd;
push @extract_location, $extract_dir;
chdir $cwd;
}
# It is possible that the binary archive itself contains additional
# files compressed the same way (true for Java .ear files). Go
# through all the files that were extracted, see if they are binary
# archives and try to extract them. Lather, rinse, repeat.
my $binary_archives_exist = 1;
my $count_binary_archives = 0;
my $previous_count = 0;
my $n_pass = 0;
while ($binary_archives_exist) {
@binary_archive = ();
foreach my $dir (@extract_location) {
find(\&archive_files, $dir); # populates global @binary_archive
}
foreach my $archive (@binary_archive) {
my $extract_dir = undef;
if ($opt_sdir) {
++$TEMP_OFF;
$extract_dir = "$opt_sdir/$TEMP_OFF";
File::Path::rmtree($extract_dir) if is_dir($extract_dir);
File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
} else {
$extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
}
$TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
print "mkdir $extract_dir\n" if $opt_v;
print "cd $extract_dir\n" if $opt_v;
chdir $extract_dir;
my $extract_cmd = uncompress_archive_cmd($archive);
print $extract_cmd, "\n" if $opt_v;
system $extract_cmd;
push @extract_location, $extract_dir;
unlink $archive; # otherwise will be extracting it forever
}
$count_binary_archives = scalar @binary_archive;
if ($count_binary_archives == $previous_count) {
$binary_archives_exist = 0;
}
$previous_count = $count_binary_archives;
}
chdir $cwd;
@ARGV = @extract_location;
} else {
# see if any of the inputs need to be auto-uncompressed &/or expanded
my @updated_ARGS = ();
foreach my $Arg (@ARGV) {
if (is_dir($Arg)) {
push @updated_ARGS, $Arg;
next;
}
my $full_path = "";
if (File::Spec->file_name_is_absolute( $Arg )) {
$full_path = $Arg;
} else {
$full_path = File::Spec->catfile( $cwd, $Arg );
}
#print "full_path = [$full_path]\n";
my $extract_cmd = uncompress_archive_cmd($full_path);
if ($extract_cmd) {
my $extract_dir = undef;
if ($opt_sdir) {
++$TEMP_OFF;
$extract_dir = "$opt_sdir/$TEMP_OFF";
File::Path::rmtree($extract_dir) if is_dir($extract_dir);
File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
} else {
$extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
}
$TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
print "mkdir $extract_dir\n" if $opt_v;
print "cd $extract_dir\n" if $opt_v;
chdir $extract_dir;
print $extract_cmd, "\n" if $opt_v;
system $extract_cmd;
push @updated_ARGS, $extract_dir;
chdir $cwd;
} else {
# this is a conventional, uncompressed, unarchived file
# or a directory; keep as-is
push @updated_ARGS, $Arg;
}
}
@ARGV = @updated_ARGS;
# make sure we're not counting any directory containing
# temporary installations of Regexp::Common, Algorithm::Diff
foreach my $d (sort keys %TEMP_INST) {
foreach my $a (@ARGV) {
next unless is_dir($a);
if ($opt_v > 2) {
printf "Comparing %s (location of %s) to input [%s]\n",
$d, $TEMP_INST{$d}, $a;
}
if ($a eq $d) {
die "File::Temp::tempdir chose directory ",
$d, " to install ", $TEMP_INST{$d}, " but this ",
"matches one of your input directories. Rerun ",
"with --sdir and supply a different temporary ",
"directory for ", $TEMP_INST{$d}, "\n";
}
}
}
}
# 1}}}
my @Errors = ();
my @file_list = (); # global variable updated in files()
my %Ignored = (); # files that are not counted (language not recognized or
# problems reading the file)
my @Lines_Out = ();
if ($opt_diff) {
# Step 4: Separate code from non-code files. {{{1
my @fh = ();
my @files_for_set = ();
# make file lists for each separate argument
for (my $i = 0; $i < scalar @ARGV; $i++) {
push @fh,
make_file_list([ $ARGV[$i] ], \%Error_Codes, \@Errors, \%Ignored);
@{$files_for_set[$i]} = @file_list;
if ($opt_exclude_list_file) {
# note: process_exclude_list_file() references global @file_list
process_exclude_list_file($opt_exclude_list_file,
\%Exclude_Dir,
\%Ignored);
}
if ($opt_no_autogen) {
exclude_autogenerated_files(\@{$files_for_set[$i]}, # in/out
\%Error_Codes, \@Errors, \%Ignored);
}
@file_list = ();
}
# 1}}}
# Step 5: Remove duplicate files. {{{1
#
my %Language = ();
my %unique_source_file = ();
my $n_set = 0;
foreach my $FH (@fh) { # loop over each pair of file sets
++$n_set;
remove_duplicate_files($FH,
\%{$Language{$FH}} ,
\%{$unique_source_file{$FH}} ,
\%Error_Codes ,
\@Errors ,
\%Ignored );
printf "%2d: %8d unique file%s. \r",
$n_set,
plural_form(scalar keys %unique_source_file)
unless $opt_quiet;
}
# 1}}}
# Step 6: Count code, comments, blank lines. {{{1
#
my %Results_by_Language = ();
my %Results_by_File = ();
my %Delta_by_Language = ();
my %Delta_by_File = ();
my @files_added_tot = ();
my @files_removed_tot = ();
my @file_pairs_tot = ();
my %alignment = ();
my $fset_a = $fh[0];
my $fset_b = $fh[1];
my $n_filepairs_compared = 0;
my $tot_counted = 0;
if ( scalar @fh != 2 ) {
print "Error: in correct length fh array when preparing diff at step 6.\n";
exit 1;
}
align_by_pairs (\%{$unique_source_file{$fset_a}} , # in
\%{$unique_source_file{$fset_b}} , # in
\@files_added_tot , # out
\@files_removed_tot , # out
\@file_pairs_tot , # out
);
if ( $max_processes == 0) {
# Multiprocessing is disabled
my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot, \@files_removed_tot, \@file_pairs_tot , 0, \%Language);
%Results_by_File = %{$part->{'results_by_file'}};
%Results_by_Language= %{$part->{'results_by_language'}};
%Delta_by_File = %{$part->{'delta_by_file'}};
%Delta_by_Language= %{$part->{'delta_by_language'}};
%Ignored = ( %Ignored, %{$part->{'ignored'}});
%alignment = %{$part->{'alignment'}};
$n_filepairs_compared = $part->{'n_filepairs_compared'};
push ( @Errors, @{$part->{'errors'}});
}
else {
# Multiprocessing is enabled
# Do not create more processes than the amount of data to be processed
my $num_processes = min(max(scalar @files_added_tot,scalar @files_removed_tot,scalar @file_pairs_tot),$max_processes);
# ... but use at least one process.
$num_processes = 1
if $num_processes == 0;
# Start processes for counting
my $pm = Parallel::ForkManager->new($num_processes);
# When processes finish, they will use the embedded subroutine for
# merging the data into global variables.
$pm->run_on_finish ( sub {
my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
my $part_ignored = $part->{'ignored'};
my $part_result_by_file = $part->{'results_by_file'};
my $part_result_by_language = $part->{'results_by_language'};
my $part_delta_by_file = $part->{'delta_by_file'};
my $part_delta_by_language = $part->{'delta_by_language'};
my $part_alignment = $part->{'alignment'};
my $part_errors = $part->{'errors'};
$tot_counted += scalar keys %$part_result_by_file;
$n_filepairs_compared += $part->{'n_filepairs_compared'};
# Since files are processed by multiple processes, we can't measure
# the number of processed files exactly. We approximate this by showing
# the number of files counted by finished processes.
printf "Counting: %d\r", $tot_counted
if $opt_progress_rate;
foreach my $this_language ( keys %$part_result_by_language ) {
my $counts = $part_result_by_language->{$this_language};
foreach my $inner_key ( keys %$counts ) {
$Results_by_Language{$this_language}{$inner_key} +=
$counts->{$inner_key};
}
}
foreach my $this_language ( keys %$part_delta_by_language ) {
my $counts = $part_delta_by_language->{$this_language};
foreach my $inner_key ( keys %$counts ) {
my $statuses = $counts->{$inner_key};
foreach my $inner_status ( keys %$statuses ) {
$Delta_by_Language{$this_language}{$inner_key}{$inner_status} +=
$counts->{$inner_key}->{$inner_status};
}
}
}
foreach my $label ( keys %$part_alignment ) {
my $inner = $part_alignment->{$label};
foreach my $key ( keys %$inner ) {
$alignment{$label}{$key} = 1;
}
}
%Results_by_File = ( %Results_by_File, %$part_result_by_file );
%Delta_by_File = ( %Delta_by_File, %$part_delta_by_file );
%Ignored = (%Ignored, %$part_ignored );
push ( @Errors, @$part_errors );
} );
my $num_filepairs_per_part = ceil ( ( scalar @file_pairs_tot ) / $num_processes );
my $num_filesremoved_per_part = ceil ( ( scalar @files_removed_tot ) / $num_processes );
my $num_filesadded_per_part = ceil ( ( scalar @files_added_tot ) / $num_processes );
while ( 1 ) {
my @files_added_part = splice @files_added_tot, 0, $num_filesadded_per_part;
my @files_removed_part = splice @files_removed_tot, 0, $num_filesremoved_per_part;
my @filepairs_part = splice @file_pairs_tot, 0, $num_filepairs_per_part;
if ( scalar @files_added_part == 0 and scalar @files_removed_part == 0 and
scalar @filepairs_part == 0 ) {
last;
}
$pm->start() and next;
my $count_result = count_filesets ( $fset_a, $fset_b,
\@files_added_part, \@files_removed_part, \@filepairs_part, 1 , \%Language );
$pm->finish(0 , $count_result);
}
# Wait for processes to finish
$pm->wait_all_children();
}
# Write alignment data, if needed
if ($opt_diff_alignment) {
write_alignment_data ( $opt_diff_alignment, $n_filepairs_compared, \%alignment ) ;
}
#use Data::Dumper;
#print Dumper("Delta_by_Language:" , \%Delta_by_Language);
#print Dumper("Results_by_Language:", \%Results_by_Language);
#print Dumper("Delta_by_File:" , \%Delta_by_File);
#print Dumper("Results_by_File:" , \%Results_by_File);
#die;
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
write_file($opt_ignored, @ignored_reasons ) if $opt_ignored;
write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
# 1}}}
# Step 7: Assemble results. {{{1
#
my $end_time = get_time();
printf "%8d file%s ignored. \n",
plural_form(scalar keys %Ignored) unless $opt_quiet;
print_errors(\%Error_Codes, \@Errors) if @Errors;
if (!%Delta_by_Language) {
print "Nothing to count.\n";
exit;
}
if ($opt_by_file) {
@Lines_Out = diff_report($VERSION, get_time() - $start_time,
"by file",
\%Delta_by_File, \%Scale_Factor);
} else {
@Lines_Out = diff_report($VERSION, get_time() - $start_time,
"by language",
\%Delta_by_Language, \%Scale_Factor);
}
# 1}}}
} else {
# Step 4: Separate code from non-code files. {{{1
my $fh = 0;
if ($opt_list_file or $opt_vcs) {
my @list;
if ($opt_vcs) {
@list = invoke_generator($opt_vcs, \@ARGV);
} else {
@list = read_list_file($opt_list_file);
}
$fh = make_file_list(\@list, \%Error_Codes, \@Errors, \%Ignored);
} else {
$fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored);
# make_file_list populates global variable @file_list via call to
# File::Find's find() which in turn calls files()
}
if ($opt_exclude_list_file) {
# note: process_exclude_list_file() references global @file_list
process_exclude_list_file($opt_exclude_list_file,
\%Exclude_Dir,
\%Ignored);
}
if ($opt_skip_win_hidden and $ON_WINDOWS) {
my @file_list_minus_hidded = ();
# eval code to run on Unix without 'missing Win32::File module' error.
my $win32_file_invocation = '
use Win32::File;
foreach my $F (@file_list) {
my $attr = undef;
Win32::File::GetAttributes($F, $attr);
if ($attr & HIDDEN) {
$Ignored{$F} = "Windows hidden file";
print "Ignoring $F since it is a Windows hidden file\n"
if $opt_v > 1;
} else {
push @file_list_minus_hidded, $F;
}
}';
eval $win32_file_invocation;
@file_list = @file_list_minus_hidded;
}
if ($opt_no_autogen) {
exclude_autogenerated_files(\@file_list, # in/out
\%Error_Codes, \@Errors, \%Ignored);
}
#printf "%8d file%s excluded. \n",
# plural_form(scalar keys %Ignored)
# unless $opt_quiet;
# die print ": ", join("\n: ", @file_list), "\n";
# 1}}}
# Step 5: Remove duplicate files. {{{1
#
my %Language = ();
my %unique_source_file = ();
remove_duplicate_files($fh , # in
\%Language , # out
\%unique_source_file , # out
\%Error_Codes , # in
\@Errors , # out
\%Ignored ); # out
printf "%8d unique file%s. \n",
plural_form(scalar keys %unique_source_file)
unless $opt_quiet;
# 1}}}
# Step 6: Count code, comments, blank lines. {{{1
#
my %Results_by_Language = ();
my %Results_by_File = ();
my @results_parts = ();
my @sorted_files = sort keys %unique_source_file;
if ( $max_processes == 0) {
# Multiprocessing is disabled
my $part = count_files ( \@sorted_files , 0, \%Language);
%Results_by_File = %{$part->{'results_by_file'}};
%Results_by_Language= %{$part->{'results_by_language'}};
%Ignored = ( %Ignored, %{$part->{'ignored'}});
push ( @Errors, @{$part->{'errors'}});
}
else {
# Do not create more processes than the number of files to be processed
my $num_files = scalar @sorted_files;
my $num_processes = $num_files >= $max_processes ? $max_processes : $num_files;
# Use at least one process.
$num_processes = 1
if $num_processes == 0;
# Start processes for counting
my $pm = Parallel::ForkManager->new($num_processes);
# When processes finish, they will use the embedded subroutine for
# merging the data into global variables.
$pm->run_on_finish ( sub {
my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
my $part_ignored = $part->{'ignored'};
my $part_result_by_file = $part->{'results_by_file'};
my $part_result_by_language = $part->{'results_by_language'};
my $part_errors = $part->{'errors'};
my $nCounted+= scalar keys %$part_result_by_file;
# Since files are processed by multiple processes, we can't measure
# the number of processed files exactly. We approximate this by showing
# the number of files counted by finished processes.
printf "Counting: %d\r", $nCounted
if $opt_progress_rate;
foreach my $this_language ( keys %$part_result_by_language ) {
my $counts = $part_result_by_language->{$this_language};
foreach my $inner_key ( keys %$counts ) {
$Results_by_Language{$this_language}{$inner_key} +=
$counts->{$inner_key};
}
}
%Results_by_File = ( %Results_by_File, %$part_result_by_file );
%Ignored = (%Ignored, %$part_ignored);
push ( @Errors, @$part_errors);
} );
my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes );
while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) {
$pm->start() and next;
my $count_result = count_files ( \@part, 1, \%Language );
$pm->finish(0 , $count_result);
}
# Wait for processes to finish
$pm->wait_all_children();
}
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
write_file($opt_ignored, @ignored_reasons ) if $opt_ignored;
write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
# 1}}}
# Step 7: Assemble results. {{{1
#
my $end_time = get_time();
printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored)
unless $opt_quiet;
print_errors(\%Error_Codes, \@Errors) if @Errors;
exit unless %Results_by_Language;
generate_sql($end_time - $start_time,
\%Results_by_File, \%Scale_Factor) if $opt_sql;
exit if $skip_generate_report;
if ($opt_by_file_by_lang) {
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
"by file",
\%Results_by_File, \%Scale_Factor);
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
"by language",
\%Results_by_Language, \%Scale_Factor);
} elsif ($opt_by_file) {
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
"by file",
\%Results_by_File, \%Scale_Factor);
} else {
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
"by language",
\%Results_by_Language, \%Scale_Factor);
}
# 1}}}
}
if ($opt_report_file) { write_file($opt_report_file, @Lines_Out); }
else { print "\n", join("\n", @Lines_Out), "\n"; }
if ($opt_count_diff) {
++$opt_count_diff;
exit if $opt_count_diff > 3;
goto Top_of_Processing_Loop;
}
sub get_max_processes { # {{{1
# If user has specified valid number of processes, use that.
if (defined $opt_processes) {
eval "use Parallel::ForkManager 0.7.6;";
if ( defined $Parallel::ForkManager::VERSION ) {
$HAVE_Parallel_ForkManager = 1;
}
if ( $opt_processes !~ /^\d+$/ ) {
print "Error: processes option argument must be numeric.\n";
exit 1;
}
elsif ( $opt_processes >0 and ! $HAVE_Parallel_ForkManager ) {
print "Error: cannot use multiple processes, because " .
"Parallel::ForkManager is not installed, or the version is too old.\n";
exit 1;
}
elsif ( $opt_processes >0 and $ON_WINDOWS ) {
print "Error: cannot use multiple processes on Windows systems.\n";
exit 1;
}
else {
return $opt_processes;
}
}
# Disable multiprocessing on Windows - does not work reliably
if ($ON_WINDOWS) {
return 0;
}
# Disable multiprocessing if Parallel::ForkManager is not available
if ( ! $HAVE_Parallel_ForkManager ) {
return 0;
}
# Set to number of cores on Linux
if ( $^O =~ /linux/i and -x '/usr/bin/nproc' ) {
my $numavcores_linux = `/usr/bin/nproc`;
chomp $numavcores_linux;
if ( $numavcores_linux =~ /^\d+$/ ) {
return $numavcores_linux;
}
}
# Set to number of cores on MacOS
if ( $^O =~ /darwin/i and -x '/usr/sbin/sysctl') {
my $numavcores_macos = `/usr/sbin/sysctl -n hw.physicalcpu`;
chomp $numavcores_macos;
if ($numavcores_macos =~ /^\d+$/ ) {
return $numavcores_macos;
}
}
# Disable multiprocessing in other cases
return 0;
} # 1}}}
sub exclude_autogenerated_files { # {{{1
my ($ra_file_list, # in
$rh_Err , # in hash of error codes
$raa_errors , # out
$rh_Ignored , # out
) = @_;
print "-> exclude_autogenerated_files()\n" if $opt_v > 2;
my @file_list_minus_autogen = ();
foreach my $file (@{$ra_file_list}) {
if ($file !~ /\.go$/) {
# at the moment, only know Go autogenerated files
push @file_list_minus_autogen, $file;
next;
}
my $first_line = first_line($file, $rh_Err, $raa_errors);
if ($first_line =~ m{^//\s+Code\s+generated\s+.*?\s+DO\s+NOT\s+EDIT\.$}) {
$rh_Ignored->{$file} = 'Go autogenerated file';
} else {
# Go, but not autogenerated
push @file_list_minus_autogen, $file;
}
}
@{$ra_file_list} = @file_list_minus_autogen;
print "<- exclude_autogenerated_files()\n" if $opt_v > 2;
} # 1}}}
sub file_extension { # {{{1
my ($fname, ) = @_;
$fname =~ m/\.(\w+)$/;
if ($1) {
return $1;
} else {
return "";
}
} # 1}}}
sub count_files { # {{{1
my ($filelist, $counter_type, $language_hash) = @_;
print "-> count_files()\n" if $opt_v > 2;
my @p_errors = ();
my %p_ignored = ();
my %p_rbl = ();
my %p_rbf = ();
my %Language = %{$language_hash};
foreach my $file (@$filelist) {
if ( ! $counter_type ) {
# Multithreading disabled
$nCounted++;
printf "Counting: %d\r", $nCounted
unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
}
next if $Ignored{$file};
if ($opt_include_ext and not $Include_Ext{ file_extension($file) }) {
$p_ignored{$file} = "not in --include-ext=$opt_include_ext";
next;
}
if ($opt_include_lang and not $Include_Language{$Language{$file}}) {
$p_ignored{$file} = "not in --include-lang=$opt_include_lang";
next;
}
if ($Exclude_Language{$Language{$file}}) {
$p_ignored{$file} = "--exclude-lang=$Language{$file}";
next;
}
my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
if ($Filters_by_Language_Language_file) {
if ($Language{$file} eq "(unknown)") {
$p_ignored{$file} = "language unknown (#1)";
} else {
$p_ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
}
next;
}
my ($all_line_count, $blank_count, $comment_count, $code_count);
if ($opt_use_sloccount and $Language{$file} =~ /^(C|C\+\+|XML|PHP|Pascal|Java)$/) {
chomp ($blank_count = `grep -cv \"[^[:space:]]\" '$file'`);
chomp ($all_line_count = `cat '$file' | wc -l`);
if ($Language{$file} =~ /^(C|C\+\+)$/) {
$code_count = `cat '$file' | c_count | head -n 1`;
} elsif ($Language{$file} eq "XML") {
$code_count = `cat '$file' | xml_count | head -n 1`;
} elsif ($Language{$file} eq "PHP") {
$code_count = `cat '$file' | php_count | head -n 1`;
} elsif ($Language{$file} eq "Pascal") {
$code_count = `cat '$file' | pascal_count | head -n 1`;
} elsif ($Language{$file} eq "Java") {
$code_count = `cat '$file' | java_count | head -n 1`;
} else {
die "SLOCCount match failure: file=[$file] lang=[$Language{$file}]";
}
$code_count = substr($code_count, 0, -2);
$comment_count = $all_line_count - $code_count - $blank_count;
} else {
($all_line_count,
$blank_count ,
$comment_count ,) = call_counter($file, $Language{$file}, \@Errors);
$code_count = $all_line_count - $blank_count - $comment_count;
}
if ($opt_by_file) {
$p_rbf{$file}{'code' } = $code_count ;
$p_rbf{$file}{'blank' } = $blank_count ;
$p_rbf{$file}{'comment'} = $comment_count ;
$p_rbf{$file}{'lang' } = $Language{$file};
$p_rbf{$file}{'nFiles' } = 1;
} else {
$p_rbf{$file} = 1; # just keep track of counted files
}
$p_rbl{$Language{$file}}{'nFiles'}++;
$p_rbl{$Language{$file}}{'code'} += $code_count ;
$p_rbl{$Language{$file}}{'blank'} += $blank_count ;
$p_rbl{$Language{$file}}{'comment'} += $comment_count;
}
print "<- count_files()\n" if $opt_v > 2;
return {
"ignored" => \%p_ignored,
"errors" => \@p_errors,
"results_by_file" => \%p_rbf,
"results_by_language" => \%p_rbl,
}
} # 1}}}
sub count_filesets { # {{{1
my ($fset_a,
$fset_b,
$files_added,
$files_removed,
$file_pairs,
$counter_type,
$language_hash) = @_;
print "-> count_filesets()\n" if $opt_v > 2;
my @p_errors = ();
my %p_alignment = ();
my %p_ignored = ();
my %p_rbl = ();
my %p_rbf = ();
my %p_dbl = ();
my %p_dbf = ();
my %Language = %$language_hash;
my $nCounted = 0;
my %already_counted = (); # already_counted{ filename } = 1
if (!@$file_pairs) {
# Special case where all files were either added or deleted.
# In this case, one of these arrays will be empty:
# @files_added, @files_removed
# so loop over both to cover both cases.
my $status = @$files_added ? 'added' : 'removed';
my $fset = @$files_added ? $fset_b : $fset_a;
foreach my $file (@$files_added, @$files_removed) {
next unless defined $Language{$fset}{$file};
my $Lang = $Language{$fset}{$file};
next if $Lang eq '(unknown)';
my ($all_line_count,
$blank_count ,
$comment_count ,
) = call_counter($file, $Lang, \@p_errors);
$already_counted{$file} = 1;
my $code_count = $all_line_count-$blank_count-$comment_count;
if ($opt_by_file) {
$p_dbf{$file}{'code' }{$status} += $code_count ;
$p_dbf{$file}{'blank' }{$status} += $blank_count ;
$p_dbf{$file}{'comment'}{$status} += $comment_count;
$p_dbf{$file}{'lang' }{$status} = $Lang ;
$p_dbf{$file}{'nFiles' }{$status} += 1 ;
}
$p_dbl{$Lang}{'code' }{$status} += $code_count ;
$p_dbl{$Lang}{'blank' }{$status} += $blank_count ;
$p_dbl{$Lang}{'comment'}{$status} += $comment_count;
$p_dbl{$Lang}{'nFiles' }{$status} += 1 ;
}
}
#use Data::Dumper::Simple;
#use Data::Dumper;
#print Dumper(\@files_added, \@files_removed, \@file_pairs);
#print "after align_by_pairs:\n";
#print "added:\n";
foreach my $f (@$files_added) {
next if $already_counted{$f};
#printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
# Don't proceed unless the file (both L and R versions)
# is in a known language.
next if $opt_include_ext
and not $Include_Ext{ file_extension($f) };
next if $opt_include_lang
and not $Include_Language{$Language{$fset_b}{$f}};
next if $Language{$fset_b}{$f} eq "(unknown)";
next if $Exclude_Language{$fset_b}{$f};
$p_alignment{"added"}{sprintf " + %s ; %s\n", $f, $Language{$fset_b}{$f}} = 1;
++$p_dbl{ $Language{$fset_b}{$f} }{'nFiles'}{'added'};
# Additionally, add contents of file $f to
# Delta_by_File{$f}{comment/blank/code}{'added'}
# Delta_by_Language{$lang}{comment/blank/code}{'added'}
# via the $p_dbl and $p_dbf variables.
my ($all_line_count,
$blank_count ,
$comment_count ,
) = call_counter($f, $Language{$fset_b}{$f}, \@p_errors);
$p_dbl{ $Language{$fset_b}{$f} }{'comment'}{'added'} +=
$comment_count;
$p_dbl{ $Language{$fset_b}{$f} }{'blank'}{'added'} +=
$blank_count;
$p_dbl{ $Language{$fset_b}{$f} }{'code'}{'added'} +=
$all_line_count - $blank_count - $comment_count;
$p_dbf{ $f }{'comment'}{'added'} = $comment_count;
$p_dbf{ $f }{'blank'}{'added'} = $blank_count;
$p_dbf{ $f }{'code'}{'added'} =
$all_line_count - $blank_count - $comment_count;
}
#print "removed:\n";
foreach my $f (@$files_removed) {
next if $already_counted{$f};
# Don't proceed unless the file (both L and R versions)
# is in a known language.
next if $opt_include_ext
and not $Include_Ext{ file_extension($f) };
next if $opt_include_lang
and not $Include_Language{$Language{$fset_a}{$f}};
next if $Language{$fset_a}{$f} eq "(unknown)";
next if $Exclude_Language{$fset_a}{$f};
++$p_dbl{ $Language{$fset_a}{$f} }{'nFiles'}{'removed'};
$p_alignment{"removed"}{sprintf " - %s ; %s\n", $f, $Language{$fset_a}{$f}} = 1;
#printf "%10s -> %s\n", $f, $Language{$fh[$F ]}{$f};
# Additionally, add contents of file $f to
# Delta_by_File{$f}{comment/blank/code}{'removed'}
# Delta_by_Language{$lang}{comment/blank/code}{'removed'}
# via the $p_dbl and $p_dbf variables.
my ($all_line_count,
$blank_count ,
$comment_count ,
) = call_counter($f, $Language{$fset_a}{$f}, \@p_errors);
$p_dbl{ $Language{$fset_a}{$f}}{'comment'}{'removed'} +=
$comment_count;
$p_dbl{ $Language{$fset_a}{$f}}{'blank'}{'removed'} +=
$blank_count;
$p_dbl{ $Language{$fset_a}{$f}}{'code'}{'removed'} +=
$all_line_count - $blank_count - $comment_count;
$p_dbf{ $f }{'comment'}{'removed'} = $comment_count;
$p_dbf{ $f }{'blank'}{'removed'} = $blank_count;
$p_dbf{ $f }{'code'}{'removed'} =
$all_line_count - $blank_count - $comment_count;
}
my $n_file_pairs_compared = 0;
# Don't know ahead of time how many file pairs will be compared
# since duplicates are weeded out below. The answer is
# scalar @file_pairs only if there are no duplicates.
foreach my $pair (@$file_pairs) {
my $file_L = $pair->[0];
my $file_R = $pair->[1];
my $Lang_L = $Language{$fset_a}{$file_L};
my $Lang_R = $Language{$fset_b}{$file_R};
#print "main step 6 file_L=$file_L file_R=$file_R\n";
++$nCounted;
printf "Counting: %d\r", $nCounted
unless ($counter_type or !$opt_progress_rate or ($nCounted % $opt_progress_rate));
next if $p_ignored{$file_L};
# filter out non-included extensions
if ($opt_include_ext and not $Include_Ext{ file_extension($Lang_L) }
and not $Include_Ext{ file_extension($Lang_R) }) {
$p_ignored{$file_L} = "not in --include-lang=$opt_include_ext";
$p_ignored{$file_R} = "not in --include-lang=$opt_include_ext";
next;
}
# filter out non-included languages
if ($opt_include_lang and not $Include_Language{$Lang_L}
and not $Include_Language{$Lang_R}) {
$p_ignored{$file_L} = "not in --include-lang=$opt_include_lang";
$p_ignored{$file_R} = "not in --include-lang=$opt_include_lang";
next;
}
# filter out excluded or unrecognized languages
if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
$p_ignored{$file_L} = "--exclude-lang=$Lang_L";
$p_ignored{$file_R} = "--exclude-lang=$Lang_R";
next;
}
my $not_Filters_by_Language_Lang_LR = 0;
#print "file_LR = [$file_L] [$file_R]\n";
#print "Lang_LR = [$Lang_L] [$Lang_R]\n";
if (($Lang_L eq "(unknown)") or
($Lang_R eq "(unknown)") or
!(@{$Filters_by_Language{$Lang_L} }) or
!(@{$Filters_by_Language{$Lang_R} })) {
$not_Filters_by_Language_Lang_LR = 1;
}
if ($not_Filters_by_Language_Lang_LR) {
if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
$p_ignored{$fset_a}{$file_L} = "language unknown (#1)";
$p_ignored{$fset_b}{$file_R} = "language unknown (#1)";
} else {
$p_ignored{$fset_a}{$file_L} = "missing Filters_by_Language{$Lang_L}";
$p_ignored{$fset_b}{$file_R} = "missing Filters_by_Language{$Lang_R}";
}
next;
}
#print "DIFF($file_L, $file_R)\n";
# step 0: compare the two files' contents
chomp ( my @lines_L = read_file($file_L) );
chomp ( my @lines_R = read_file($file_R) );
my $language_file_L = "";
if (defined $Language{$fset_a}{$file_L}) {
$language_file_L = $Language{$fset_a}{$file_L};
} else {
# files $file_L and $file_R do not contain known language
next;
}
my $contents_are_same = 1;
if (scalar @lines_L == scalar @lines_R) {
# same size, must compare line-by-line
for (my $i = 0; $i < scalar @lines_L; $i++) {
if ($lines_L[$i] ne $lines_R[$i]) {
$contents_are_same = 0;
last;
}
}
if ($contents_are_same) {
++$p_dbl{$language_file_L}{'nFiles'}{'same'};
} else {
++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
}
} else {
$contents_are_same = 0;
# different sizes, contents have changed
++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
}
if ($opt_diff_alignment) {
my $str = "$file_L | $file_R ; $language_file_L";
if ($contents_are_same) {
$p_alignment{"pairs"}{" == $str"} = 1;
} else {
$p_alignment{"pairs"}{" != $str"} = 1;
}
++$n_file_pairs_compared;
}
my ($all_line_count_L, $blank_count_L , $comment_count_L ,
$all_line_count_R, $blank_count_R , $comment_count_R , ) = (0,0,0,0,0,0,);
if (!$contents_are_same) {
# step 1: identify comments in both files
#print "Diff blank removal L language= $Lang_L";
#print " scalar(lines_L)=", scalar @lines_L, "\n";
my @original_minus_blanks_L
= rm_blanks( \@lines_L, $Lang_L, \%EOL_Continuation_re);
#print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
@lines_L = @original_minus_blanks_L;
#print "2: scalar(lines_L)=", scalar @lines_L, "\n";
@lines_L = add_newlines(\@lines_L); # compensate for rm_comments()
@lines_L = rm_comments( \@lines_L, $Lang_L, $file_L,
\%EOL_Continuation_re);
#print "3: scalar(lines_L)=", scalar @lines_L, "\n";
#print "Diff blank removal R language= $Lang_R\n";
my @original_minus_blanks_R
= rm_blanks( \@lines_R, $Lang_R, \%EOL_Continuation_re);
@lines_R = @original_minus_blanks_R;
@lines_R = add_newlines(\@lines_R); # taken away by rm_comments()
@lines_R = rm_comments( \@lines_R, $Lang_R, $file_R,
\%EOL_Continuation_re);
my (@diff_LL, @diff_LR, );
array_diff( $file_L , # in
\@original_minus_blanks_L , # in
\@lines_L , # in
"comment" , # in
\@diff_LL, \@diff_LR , # out
\@p_errors); # in/out
my (@diff_RL, @diff_RR, );
array_diff( $file_R , # in
\@original_minus_blanks_R , # in
\@lines_R , # in
"comment" , # in
\@diff_RL, \@diff_RR , # out
\@p_errors); # in/out
# each line of each file is now classified as
# code or comment
#use Data::Dumper;
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
#print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
#die;
# step 2: separate code from comments for L and R files
my @code_L = ();
my @code_R = ();
my @comm_L = ();
my @comm_R = ();
foreach my $line_info (@diff_LL) {
if ($line_info->{'type'} eq "code" ) {
push @code_L, $line_info->{char};
} elsif ($line_info->{'type'} eq "comment") {
push @comm_L, $line_info->{char};
} else {
die "Diff unexpected line type ",
$line_info->{'type'}, "for $file_L line ",
$line_info->{'lnum'};
}
}
foreach my $line_info (@diff_RL) {
if ($line_info->{type} eq "code" ) {
push @code_R, $line_info->{'char'};
} elsif ($line_info->{type} eq "comment") {
push @comm_R, $line_info->{'char'};
} else {
die "Diff unexpected line type ",
$line_info->{'type'}, "for $file_R line ",
$line_info->{'lnum'};
}
}
if ($opt_ignore_whitespace) {
# strip all whitespace from each line of source code
# and comments then use these stripped arrays in diffs
foreach (@code_L) { s/\s+//g }
foreach (@code_R) { s/\s+//g }
foreach (@comm_L) { s/\s+//g }
foreach (@comm_R) { s/\s+//g }
}
if ($opt_ignore_case) {
# change all text to lowercase in diffs
foreach (@code_L) { $_ = lc }
foreach (@code_R) { $_ = lc }
foreach (@comm_L) { $_ = lc }
foreach (@comm_R) { $_ = lc }
}
# step 3: compute code diffs
array_diff("$file_L v. $file_R" , # in
\@code_L , # in
\@code_R , # in
"revision" , # in
\@diff_LL, \@diff_LR , # out
\@p_errors); # in/out
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
#print Dumper("diff_LR", \@diff_LR);
foreach my $line_info (@diff_LR) {
my $status = $line_info->{'desc'}; # same|added|removed|modified
++$p_dbl{$Lang_L}{'code'}{$status};
if ($opt_by_file) {
++$p_dbf{$file_L}{'code'}{$status};
}
}
#use Data::Dumper;
#print Dumper("code diffs:", \@diff_LL, \@diff_LR);
# step 4: compute comment diffs
array_diff("$file_L v. $file_R" , # in
\@comm_L , # in
\@comm_R , # in
"revision" , # in
\@diff_LL, \@diff_LR , # out
\@Errors); # in/out
#print Dumper("comment diff_LR", \@diff_LR);
foreach my $line_info (@diff_LR) {
my $status = $line_info->{'desc'}; # same|added|removed|modified
++$p_dbl{$Lang_L}{'comment'}{$status};
if ($opt_by_file) {
++$p_dbf{$file_L}{'comment'}{$status};
}
}
#print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
# step 5: compute difference in blank lines (kind of pointless)
next if $Lang_L eq '(unknown)' or
$Lang_R eq '(unknown)';
($all_line_count_L,
$blank_count_L ,
$comment_count_L ,
) = call_counter($file_L, $Lang_L, \@Errors);
($all_line_count_R,
$blank_count_R ,
$comment_count_R ,
) = call_counter($file_R, $Lang_R, \@Errors);
} else {
# L and R file contents are identical, no need to diff
($all_line_count_L,
$blank_count_L ,
$comment_count_L ,
) = call_counter($file_L, $Lang_L, \@Errors);
$all_line_count_R = $all_line_count_L;
$blank_count_R = $blank_count_L ;
$comment_count_R = $comment_count_L ;
my $code_lines_R = $all_line_count_R - ($blank_count_R + $comment_count_R);
$p_dbl{$Lang_L}{'blank'}{'same'} += $blank_count_R;
$p_dbl{$Lang_L}{'comment'}{'same'} += $comment_count_R;
$p_dbl{$Lang_L}{'code'}{'same'} += $code_lines_R;
if ($opt_by_file) {
$p_dbf{$file_L}{'blank'}{'same'} += $blank_count_R;
$p_dbf{$file_L}{'comment'}{'same'} += $comment_count_R;
$p_dbf{$file_L}{'code'}{'same'} += $code_lines_R;
}
}
if ($blank_count_L < $blank_count_R) {
my $D = $blank_count_R - $blank_count_L;
$p_dbl{$Lang_L}{'blank'}{'added'} += $D;
} else {
my $D = $blank_count_L - $blank_count_R;
$p_dbl{$Lang_L}{'blank'}{'removed'} += $D;
}
if ($opt_by_file) {
if ($blank_count_L < $blank_count_R) {
my $D = $blank_count_R - $blank_count_L;
$p_dbf{$file_L}{'blank'}{'added'} += $D;
} else {
my $D = $blank_count_L - $blank_count_R;
$p_dbf{$file_L}{'blank'}{'removed'} += $D;
}
}
my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L;
if ($opt_by_file) {
$p_rbf{$file_L}{'code' } = $code_count_L ;
$p_rbf{$file_L}{'blank' } = $blank_count_L ;
$p_rbf{$file_L}{'comment'} = $comment_count_L ;
$p_rbf{$file_L}{'lang' } = $Lang_L ;
$p_rbf{$file_L}{'nFiles' } = 1 ;
} else {
$p_rbf{$file_L} = 1; # just keep track of counted files
}
$p_rbl{$Lang_L}{'nFiles'}++;
$p_rbl{$Lang_L}{'code'} += $code_count_L ;
$p_rbl{$Lang_L}{'blank'} += $blank_count_L ;
$p_rbl{$Lang_L}{'comment'} += $comment_count_L;
}
print "<- count_files()\n" if $opt_v > 2;
return {
"ignored" => \%p_ignored,
"errors" => \@p_errors,
"results_by_file" => \%p_rbf,
"results_by_language" => \%p_rbl,
"delta_by_file" => \%p_dbf,
"delta_by_language" => \%p_dbl,
"alignment" => \%p_alignment,
"n_filepairs_compared" => $n_file_pairs_compared
}
} # 1}}}
sub write_alignment_data { # {{{1
my ($filename, $n_filepairs_compared, $data ) = @_;
my @output = ();
if ( $data->{'added'} ) {
my %added_lines = %{$data->{'added'}};
push (@output, "Files added: " . (scalar keys %added_lines) . "\n");
foreach my $line ( sort keys %added_lines ) {
push (@output, $line);
}
push (@output, "\n" );
}
if ( $data->{'removed'} ) {
my %removed_lines = %{$data->{'removed'}};
push (@output, "Files removed: " . (scalar keys %removed_lines) . "\n");
foreach my $line ( sort keys %removed_lines ) {
push (@output, $line);
}
push (@output, "\n");
}
if ( $data->{'pairs'} ) {
my %pairs = %{$data->{'pairs'}};
push (@output, "File pairs compared: " . $n_filepairs_compared . "\n");
foreach my $pair ( sort keys %pairs ) {
push (@output, $pair);
}
}
write_file($filename, @output);
} # 1}}}
sub exclude_dir_validates { # {{{1
my ($rh_Exclude_Dir) = @_;
my $is_OK = 1;
foreach my $dir (keys %{$rh_Exclude_Dir}) {
if (($ON_WINDOWS and $dir =~ m{\\}) or ($dir =~ m{/})) {
$is_OK = 0;
warn "--exclude-dir '$dir' : cannot specify directory paths\n";
}
}
if (!$is_OK) {
warn "Use '--fullpath --not-match-d=REGEX' instead\n";
}
return $is_OK;
} # 1}}}
sub process_exclude_list_file { # {{{1
my ($list_file , # in
$rh_exclude_dir , # out
$rh_ignored , # out
) = @_;
# note: references global @file_list
print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2;
# reject a specific set of files and/or directories
my @reject_list = read_list_file($list_file);
my @file_reject_list = ();
foreach my $F_or_D (@reject_list) {
if (is_dir($F_or_D)) {
$rh_exclude_dir->{$F_or_D} = 1;
} elsif (is_file($F_or_D)) {
push @file_reject_list, $F_or_D;
}
}
# Normalize file names for better comparison.
my %normalized_input = normalize_file_names(@file_list);
my %normalized_reject = normalize_file_names(@file_reject_list);
my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir});
foreach my $F (keys %normalized_input) {
if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) {
my $orig_F = $normalized_input{$F};
$rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file";
print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n"
if $opt_v > 1;
}
}
print "<- process_exclude_list_file\n" if $opt_v > 2;
} # 1}}}
sub combine_results { # {{{1
# returns 1 if the inputs are categorized by language
# 0 if no identifiable language was found
my ($ra_report_files, # in
$report_type , # in "by language" or "by report file"
$rhh_count , # out count{TYPE}{nFiles|code|blank|comment|scaled}
$rhaa_Filters_by_Language , # in
) = @_;
print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2;
my $found_language = 0;
foreach my $file (@{$ra_report_files}) {
my $IN = new IO::File $file, "r";
if (!defined $IN) {
warn "Unable to read $file; ignoring.\n";
next;
}
while (<$IN>) {
next if /^(http|Language|SUM|-----)/;
if (!$opt_by_file and
m{^(.*?)\s+ # language
(\d+)\s+ # files
(\d+)\s+ # blank
(\d+)\s+ # comments
(\d+)\s+ # code
( # next four entries missing with -nno3
x\s+ # x
\d+\.\d+\s+ # scale
=\s+ # =
(\d+\.\d+)\s* # scaled code
)?
$}x) {
if ($report_type eq "by language") {
if (!defined $rhaa_Filters_by_Language->{$1}) {
warn "Unrecognized language '$1' in $file ignored\n";
next;
}
# above test necessary to avoid trying to sum reports
# of reports (which have no language breakdown).
$found_language = 1;
$rhh_count->{$1 }{'nFiles' } += $2;
$rhh_count->{$1 }{'blank' } += $3;
$rhh_count->{$1 }{'comment'} += $4;
$rhh_count->{$1 }{'code' } += $5;
$rhh_count->{$1 }{'scaled' } += $7 if $opt_3;
} else {
$rhh_count->{$file}{'nFiles' } += $2;
$rhh_count->{$file}{'blank' } += $3;
$rhh_count->{$file}{'comment'} += $4;
$rhh_count->{$file}{'code' } += $5;
$rhh_count->{$file}{'scaled' } += $7 if $opt_3;
}
} elsif ($opt_by_file and
m{^(.*?)\s+ # language
(\d+)\s+ # blank
(\d+)\s+ # comments
(\d+)\s+ # code
( # next four entries missing with -nno3
x\s+ # x
\d+\.\d+\s+ # scale
=\s+ # =
(\d+\.\d+)\s* # scaled code
)?
$}x) {
if ($report_type eq "by language") {
next unless %{$rhaa_Filters_by_Language->{$1}};
# above test necessary to avoid trying to sum reports
# of reports (which have no language breakdown).
$found_language = 1;
$rhh_count->{$1 }{'nFiles' } += 1;
$rhh_count->{$1 }{'blank' } += $2;
$rhh_count->{$1 }{'comment'} += $3;
$rhh_count->{$1 }{'code' } += $4;
$rhh_count->{$1 }{'scaled' } += $6 if $opt_3;
} else {
$rhh_count->{$file}{'nFiles' } += 1;
$rhh_count->{$file}{'blank' } += $2;
$rhh_count->{$file}{'comment'} += $3;
$rhh_count->{$file}{'code' } += $4;
$rhh_count->{$file}{'scaled' } += $6 if $opt_3;
}
}
}
}
print "<- combine_results\n" if $opt_v > 2;
return $found_language;
} # 1}}}
sub compute_denominator { # {{{1
my ($method, $nCode, $nComment, $nBlank, ) = @_;
print "-> compute_denominator\n" if $opt_v > 2;
my %den = ( "c" => $nCode );
$den{"cm"} = $den{"c"} + $nComment;
$den{"cmb"} = $den{"cm"} + $nBlank;
$den{"cb"} = $den{"c"} + $nBlank;
print "<- compute_denominator\n" if $opt_v > 2;
return $den{ $method };
} # 1}}}
sub yaml_to_json_separators { # {{{1
# YAML and JSON are closely related. Their differences can be captured
# by trailing commas ($C), braces ($open_B, $close_B), and
# quotes around text ($Q).
print "-> yaml_to_json_separators()\n" if $opt_v > 2;
my ($Q, $open_B, $close_B, $start, $C);
if ($opt_json) {
$C = ',';
$Q = '"';
$open_B = '{';
$close_B = '}';
$start = '{';
} else {
$C = '';
$Q = '' ;
$open_B = '' ;
$close_B = '';
$start = "---\n# $URL\n";
}
print "<- yaml_to_json_separators()\n" if $opt_v > 2;
return ($Q, $open_B, $close_B, $start, $C);
} # 1}}}
sub diff_report { # {{{1
# returns an array of lines containing the results
print "-> diff_report\n" if $opt_v > 2;
if ($opt_xml) {
print "<- diff_report\n" if $opt_v > 2;
return diff_xml_report(@_)
### return diff_xml_yaml_json_report(@_)
} elsif ($opt_yaml) {
print "<- diff_report\n" if $opt_v > 2;
return diff_yaml_report(@_)
} elsif ($opt_json) {
print "<- diff_report\n" if $opt_v > 2;
return diff_json_report(@_)
} elsif ($opt_csv or $opt_md) {
print "<- diff_report\n" if $opt_v > 2;
return diff_csv_report(@_)
}
my ($version , # in
$elapsed_sec, # in
$report_type, # in "by language" | "by report file" | "by file"
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
$rh_scale , # in
) = @_;
#use Data::Dumper;
#print "diff_report: ", Dumper($rhhh_count), "\n";
my @results = ();
my $languages = ();
my %sum = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
my $max_len = 0;
foreach my $language (keys %{$rhhh_count}) {
foreach my $V (qw(nFiles blank comment code)) {
foreach my $S (qw(added same modified removed)) {
$rhhh_count->{$language}{$V}{$S} = 0 unless
defined $rhhh_count->{$language}{$V}{$S};
$sum{$V}{$S} += $rhhh_count->{$language}{$V}{$S};
}
}
$max_len = length($language) if length($language) > $max_len;
}
my $column_1_offset = 0;
$column_1_offset = $max_len - 17 if $max_len > 17;
$elapsed_sec = 0.5 unless $elapsed_sec;
my $spacing_0 = 23;
my $spacing_1 = 13;
my $spacing_2 = 9;
my $spacing_3 = 17;
if (!$opt_3) {
$spacing_1 = 19;
$spacing_2 = 14;
$spacing_3 = 28;
}
$spacing_0 += $column_1_offset;
$spacing_1 += $column_1_offset;
$spacing_3 += $column_1_offset;
my %Format = (
'1' => { 'xml' => 'name="%s" ',
'txt' => "\%-${spacing_0}s ",
},
'2' => { 'xml' => 'name="%s" ',
'txt' => "\%-${spacing_3}s ",
},
'3' => { 'xml' => 'files_count="%d" ',
'txt' => '%5d ',
},
'4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
},
'5' => { 'xml' => 'blank="%.2f" comment="%.2f" code="%d" ',
'txt' => "\%3.2f \%3.2f \%${spacing_2}d",
},
'6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
'txt' => ' x %6.2f = %14.2f',
},
);
my $Style = "txt";
$Style = "xml" if $opt_xml ;
$Style = "xml" if $opt_yaml; # not a typo; just set to anything but txt
$Style = "xml" if $opt_json; # not a typo; just set to anything but txt
$Style = "xml" if $opt_csv ; # not a typo; just set to anything but txt
my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
$hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
if (!$opt_3) and (68 + $column_1_offset) > 79;
my $data_line = "";
my $first_column;
my $BY_LANGUAGE = 0;
my $BY_FILE = 0;
if ($report_type eq "by language") {
$first_column = "Language";
$BY_LANGUAGE = 1;
} elsif ($report_type eq "by file") {
$first_column = "File";
$BY_FILE = 1;
} else {
$first_column = "Report File";
}
my $header_line = sprintf "%s v %s", $URL, $version;
my $sum_files = 1;
my $sum_lines = 1;
$header_line .= sprintf(" T=%.2f s (%.1f files/s, %.1f lines/s)",
$elapsed_sec ,
$sum_files/$elapsed_sec,
$sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
if ($Style eq "txt") {
push @results, output_header($header_line, $hyphen_line, $BY_FILE);
} elsif ($Style eq "csv") {
die "csv";
}
# column headers
if (!$opt_3 and $BY_FILE) {
my $spacing_n = $spacing_1 - 11;
$data_line = sprintf "%-${spacing_n}s" , $first_column;
} else {
$data_line = sprintf "%-${spacing_1}s ", $first_column;
}
if ($BY_FILE) {
$data_line .= sprintf "%${spacing_2}s" , "" ;
} else {
$data_line .= sprintf "%${spacing_2}s " , "files";
}
my $PCT_symbol = "";
$PCT_symbol = " \%" if $opt_by_percent;
$data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
"blank${PCT_symbol}" ,
"comment${PCT_symbol}" ,
"code";
if ($Style eq "txt") {
push @results, $data_line;
push @results, $hyphen_line;
}
####foreach my $lang_or_file (keys %{$rhhh_count}) {
#### $rhhh_count->{$lang_or_file}{'code'} = 0 unless
#### defined $rhhh_count->{$lang_or_file}{'code'};
####}
foreach my $lang_or_file (sort {
$rhhh_count->{$b}{'code'} <=>
$rhhh_count->{$a}{'code'}
}
keys %{$rhhh_count}) {
if ($BY_FILE) {
push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
} else {
push @results, $lang_or_file;
}
foreach my $S (qw(same modified added removed)) {
my $indent = $spacing_1 - 2;
my $line .= sprintf " %-${indent}s", $S;
if ($BY_FILE) {
$line .= sprintf " ";
} else {
$line .= sprintf " %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
}
if ($opt_by_percent) {
my $DEN = compute_denominator($opt_by_percent ,
$rhhh_count->{$lang_or_file}{'code'}{$S} ,
$rhhh_count->{$lang_or_file}{'comment'}{$S},
$rhhh_count->{$lang_or_file}{'blank'}{$S} );
if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
$rhhh_count->{$lang_or_file}{'blank'}{$S} / $DEN * 100,
$rhhh_count->{$lang_or_file}{'comment'}{$S} / $DEN * 100,
$rhhh_count->{$lang_or_file}{'code'}{$S} ;
} else {
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
0.0, 0.0, $rhhh_count->{$lang_or_file}{'code'}{$S} ;
}
} else {
$line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
$rhhh_count->{$lang_or_file}{'blank'}{$S} ,
$rhhh_count->{$lang_or_file}{'comment'}{$S} ,
$rhhh_count->{$lang_or_file}{'code'}{$S} ;
}
push @results, $line;
}
}
push @results, $hyphen_line;
push @results, "SUM:";
foreach my $S (qw(same modified added removed)) {
my $indent = $spacing_1 - 2;
my $line .= sprintf " %-${indent}s", $S;
if ($BY_FILE) {
$line .= sprintf " ";
} else {
$line .= sprintf " %${spacing_2}s", $sum{'nFiles'}{$S};
}
if ($opt_by_percent) {
my $DEN = compute_denominator($opt_by_percent,
$sum{'code'}{$S}, $sum{'comment'}{$S}, $sum{'blank'}{$S});
if ($sum{'code'}{$S} > 0) {
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
$sum{'blank'}{$S} / $DEN * 100,
$sum{'comment'}{$S} / $DEN * 100,
$sum{'code'}{$S} ;
} else {
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
0.0, 0.0, $sum{'code'}{$S} ;
}
} else {
$line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
$sum{'blank'}{$S} ,
$sum{'comment'}{$S} ,
$sum{'code'}{$S} ;
}
push @results, $line;
}
push @results, $hyphen_line;
write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
print "<- diff_report\n" if $opt_v > 2;
return @results;
} # 1}}}
sub xml_yaml_or_json_header { # {{{1
my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines, $by_file) = @_;
print "-> xml_yaml_or_json_header\n" if $opt_v > 2;
my $header = "";
my $file_rate = $sum_files/$elapsed_sec;
my $line_rate = $sum_lines/$elapsed_sec;
my $type = "";
$type = "diff_" if $opt_diff;
my $report_file = "";
if ($opt_report_file) {
if ($opt_sum_reports) {
if ($by_file) {
$report_file = " <report_file>$opt_report_file.file</report_file>"
} else {
$report_file = " <report_file>$opt_report_file.lang</report_file>"
}
} else {
$report_file = " <report_file>$opt_report_file</report_file>"
}
}
if ($opt_xml) {
$header = "<?xml version=\"1.0\"?>";
$header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
$header .= "<${type}results>
<header>
<cloc_url>$URL</cloc_url>
<cloc_version>$version</cloc_version>
<elapsed_seconds>$elapsed_sec</elapsed_seconds>
<n_files>$sum_files</n_files>
<n_lines>$sum_lines</n_lines>
<files_per_second>$file_rate</files_per_second>
<lines_per_second>$line_rate</lines_per_second>";
$header .= "\n$report_file"
if $opt_report_file;
$header .= "\n</header>";
} elsif ($opt_yaml or $opt_json) {
my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
$header = "${start}${Q}header${Q} : $open_B
${Q}cloc_url${Q} : ${Q}$URL${Q}${C}
${Q}cloc_version${Q} : ${Q}$version${Q}${C}
${Q}elapsed_seconds${Q} : $elapsed_sec${C}
${Q}n_files${Q} : $sum_files${C}
${Q}n_lines${Q} : $sum_lines${C}
${Q}files_per_second${Q} : $file_rate${C}
${Q}lines_per_second${Q} : $line_rate";
if ($opt_report_file) {
if ($opt_sum_reports) {
if ($by_file) {
$header .= "$C\n ${Q}report_file${Q} : ${Q}$opt_report_file.file${Q}"
} else {
$header .= "$C\n ${Q}report_file${Q} : ${Q}$opt_report_file.lang${Q}"
}
} else {
$header .= "$C\n ${Q}report_file${Q} : ${Q}$opt_report_file${Q}";
}
}
$header .= "${close_B}${C}";
}
print "<- xml_yaml_or_json_header\n" if $opt_v > 2;
return $header;
} # 1}}}
sub diff_yaml_report { # {{{1
# returns an array of lines containing the results
my ($version , # in
$elapsed_sec, # in
$report_type, # in "by language" | "by report file" | "by file"
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
$rh_scale , # in
) = @_;
print "-> diff_yaml_report\n" if $opt_v > 2;
$elapsed_sec = 0.5 unless $elapsed_sec;
my @results = ();
my %sum = ();
my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
diff_header_sum($report_type, $rhhh_count, \%sum);
if (!$ALREADY_SHOWED_HEADER) {
push @results,
xml_yaml_or_json_header($URL, $version, $elapsed_sec,
$sum_files, $sum_lines, $BY_FILE);
$ALREADY_SHOWED_HEADER = 1;
}
foreach my $S (qw(added same modified removed)) {
push @results, "$S :";
foreach my $F_or_L (keys %{$rhhh_count}) {
push @results, " " . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . " :";
foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
next if $k eq "lang"; # present only in those cases
# where code exists for action $S
$rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
defined $rhhh_count->{$F_or_L}{$k}{$S};
push @results,
" $k : $rhhh_count->{$F_or_L}{$k}{$S}";
}
}
}
push @results, "SUM :";
foreach my $S (qw(added same modified removed)) {
push @results, " $S :";
foreach my $topic (keys %sum) {
push @results, " $topic : $sum{$topic}{$S}";
}
}
print "<- diff_yaml_report\n" if $opt_v > 2;
return @results;
} # 1}}}
sub diff_json_report { # {{{1
# returns an array of lines containing the results
my ($version , # in
$elapsed_sec, # in
$report_type, # in "by language" | "by report file" | "by file"
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
$rh_scale , # in
) = @_;
print "-> diff_json_report\n" if $opt_v > 2;
$elapsed_sec = 0.5 unless $elapsed_sec;
my @results = ();
my %sum = ();
my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
diff_header_sum($report_type, $rhhh_count, \%sum);
if (!$ALREADY_SHOWED_HEADER) {
push @results,
xml_yaml_or_json_header($URL, $version, $elapsed_sec,
$sum_files, $sum_lines, $BY_FILE);
$ALREADY_SHOWED_HEADER = 1;
}
foreach my $S (qw(added same modified removed)) {
push @results, " \"$S\" : {";
foreach my $F_or_L (keys %{$rhhh_count}) {
push @results, " \"" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "\" : {";
foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
next if $k eq "lang"; # present only in those cases
# where code exists for action $S
$rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
defined $rhhh_count->{$F_or_L}{$k}{$S};
push @results,
" \"$k\" : $rhhh_count->{$F_or_L}{$k}{$S},";
}
$results[-1] =~ s/,\s*$//;
push @results, " },"
}
$results[-1] =~ s/,\s*$//;
push @results, " },"
}
push @results, " \"SUM\" : {";
foreach my $S (qw(added same modified removed)) {
push @results, " \"$S\" : {";
foreach my $topic (keys %sum) {
push @results, " \"$topic\" : $sum{$topic}{$S},";
}
$results[-1] =~ s/,\s*$//;
push @results, "},";
}
$results[-1] =~ s/,\s*$//;
push @results, "} }";
print "<- diff_json_report\n" if $opt_v > 2;
return @results;
} # 1}}}
sub diff_header_sum { # {{{1
my ($report_type, # in "by language" | "by report file" | "by file"
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
$rhh_sum , # out sum{nFiles|blank|comment|code}{same|modified|added|removed}
) = @_;
my $sum_files = 0;
my $sum_lines = 0;
foreach my $language (keys %{$rhhh_count}) {
foreach my $V (qw(nFiles blank comment code)) {
foreach my $S (qw(added same modified removed)) {
$rhhh_count->{$language}{$V}{$S} = 0 unless
defined $rhhh_count->{$language}{$V}{$S};
$rhh_sum->{$V}{$S} += $rhhh_count->{$language}{$V}{$S};
if ($V eq "nFiles") {
$sum_files += $rhhh_count->{$language}{$V}{$S};
} else {
$sum_lines += $rhhh_count->{$language}{$V}{$S};
}
}
}
}
my $BY_LANGUAGE = 0;
my $BY_FILE = 0;
if ($report_type eq "by language") {
$BY_LANGUAGE = 1;
} elsif ($report_type eq "by file") {
$BY_FILE = 1;
}
return $sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE;
} # 1}}}
sub diff_xml_report { # {{{1
# returns an array of lines containing the results
my ($version , # in
$elapsed_sec, # in
$report_type, # in "by language" | "by report file" | "by file"
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
$rh_scale , # in
) = @_;
print "-> diff_xml_report\n" if $opt_v > 2;
my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
#print "diff_report: ", Dumper($rhhh_count), "\n";
$elapsed_sec = 0.5 unless $elapsed_sec;
my @results = ();
my %sum = ();
my $languages = ();
my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
diff_header_sum($report_type, $rhhh_count, \%sum);
my $data_line = "";
if (!$ALREADY_SHOWED_HEADER) {
push @results,
xml_yaml_or_json_header($URL, $version, $elapsed_sec,
$sum_files, $sum_lines, $BY_FILE);
$ALREADY_SHOWED_HEADER = 1;
}
foreach my $S (qw(same modified added removed)) {
push @results, " <$S>";
foreach my $lang_or_file (sort {
$rhhh_count->{$b}{'code'} <=>
$rhhh_count->{$a}{'code'}
}
keys %{$rhhh_count}) {
my $L = "";
if ($BY_FILE) {
$L .= sprintf " <file name=\"%s\" files_count=\"1\" ",
xml_metachars(
rm_leading_tempdir($lang_or_file, \%TEMP_DIR));
} else {
$L .= sprintf " <language name=\"%s\" files_count=\"%d\" ",
$lang_or_file ,
$rhhh_count->{$lang_or_file}{'nFiles'}{$S};
}
if ($opt_by_percent) {
my $DEN = compute_denominator($opt_by_percent ,
$rhhh_count->{$lang_or_file}{'code'}{$S} ,
$rhhh_count->{$lang_or_file}{'comment'}{$S},
$rhhh_count->{$lang_or_file}{'blank'}{$S} );
foreach my $T (qw(blank comment)) {
if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
$L .= sprintf "%s=\"%.2f\" ",
$T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
} else {
$L .= sprintf "%s=\"0.0\" ", $T;
}
}
foreach my $T (qw(code)) {
$L .= sprintf "%s=\"%d\" ",
$T, $rhhh_count->{$lang_or_file}{$T}{$S};
}
} else {
foreach my $T (qw(blank comment code)) {
$L .= sprintf "%s=\"%d\" ",
$T, $rhhh_count->{$lang_or_file}{$T}{$S};
}
}
push @results, $L . "/>";
}
my $L = sprintf " <total sum_files=\"%d\" ", $sum{'nFiles'}{$S};
if ($opt_by_percent) {
my $DEN = compute_denominator($opt_by_percent,
$sum{'code'}{$S} ,
$sum{'comment'}{$S},
$sum{'blank'}{$S} );
foreach my $V (qw(blank comment)) {
if ($sum{'code'}{$S} > 0) {
$L .= sprintf "%s=\"%.2f\" ", $V, $sum{$V}{$S} / $DEN * 100;
} else {
$L .= sprintf "%s=\"0.0\" ", $V;
}
}
foreach my $V (qw(code)) {
$L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
}
} else {
foreach my $V (qw(blank comment code)) {
$L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
}
}
push @results, $L . "/>";
push @results, " </$S>";
}
push @results, "</diff_results>";
write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
print "<- diff_xml_report\n" if $opt_v > 2;
return @results;
} # 1}}}
sub diff_csv_report { # {{{1
# returns an array of lines containing the results
my ($version , # in
$elapsed_sec, # in
$report_type, # in "by language" | "by report file" | "by file"
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
$rh_scale , # in unused
) = @_;
print "-> diff_csv_report\n" if $opt_v > 2;
#use Data::Dumper;
#print "diff_csv_report: ", Dumper($rhhh_count), "\n";
#die;
my @results = ();
my $languages = ();
my $data_line = "";
my $BY_LANGUAGE = 0;
my $BY_FILE = 0;
if ($report_type eq "by language") {
$BY_LANGUAGE = 1;
} elsif ($report_type eq "by file") {
$BY_FILE = 1;
}
my $DELIM = ",";
$DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
$elapsed_sec = 0.5 unless $elapsed_sec;
my $line = "Language${DELIM} ";
$line = "File${DELIM} " if $BY_FILE;
foreach my $item (qw(files blank comment code)) {
next if $BY_FILE and $item eq 'files';
foreach my $symbol ( '==', '!=', '+', '-', ) {
$line .= "$symbol $item${DELIM} ";
}
}
$line .= "\"$URL v $version T=$elapsed_sec s\"";
push @results, $line;
foreach my $lang_or_file (keys %{$rhhh_count}) {
$rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless
defined $rhhh_count->{$lang_or_file}{'code'};
}
foreach my $lang_or_file (sort {
$rhhh_count->{$b}{'code'} <=>
$rhhh_count->{$a}{'code'}
}
keys %{$rhhh_count}) {
if ($BY_FILE) {
$line = rm_leading_tempdir($lang_or_file, \%TEMP_DIR) . "$DELIM ";
} else {
$line = $lang_or_file . "${DELIM} ";
}
if ($opt_by_percent) {
foreach my $item (qw(nFiles)) {
next if $BY_FILE and $item eq 'nFiles';
foreach my $symbol (qw(same modified added removed)) {
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
$line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
} else {
$line .= "0${DELIM} ";
}
}
}
foreach my $item (qw(blank comment)) {
foreach my $symbol (qw(same modified added removed)) {
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol} and
defined $rhhh_count->{$lang_or_file}{'code'}{$symbol} and
$rhhh_count->{$lang_or_file}{'code'}{$symbol} > 0) {
$line .= sprintf("%.2f", $rhhh_count->{$lang_or_file}{$item}{$symbol} / $rhhh_count->{$lang_or_file}{'code'}{$symbol} * 100).${DELIM};
} else {
$line .= "0.00${DELIM} ";
}
}
}
foreach my $item (qw(code)) {
foreach my $symbol (qw(same modified added removed)) {
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
$line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
} else {
$line .= "0${DELIM} ";
}
}
}
} else {
foreach my $item (qw(nFiles blank comment code)) {
next if $BY_FILE and $item eq 'nFiles';
foreach my $symbol (qw(same modified added removed)) {
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
$line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
} else {
$line .= "0${DELIM} ";
}
}
}
}
push @results, $line;
}
print "<- diff_csv_report\n" if $opt_v > 2;
return @results;
} # 1}}}
sub rm_leading_tempdir { # {{{1
my ($in_file, $rh_temp_dirs, ) = @_;
my $clean_filename = $in_file;
foreach my $temp_d (keys %{$rh_temp_dirs}) {
if ($ON_WINDOWS) {
# \ -> / necessary to allow the next if test's
# m{} to work in the presence of spaces in file names
$temp_d =~ s{\\}{/}g;
$clean_filename =~ s{\\}{/}g;
}
if ($clean_filename =~ m{^$temp_d/}) {
$clean_filename =~ s{^$temp_d/}{};
last;
}
}
$clean_filename =~ s{/}{\\}g if $ON_WINDOWS; # then go back from / to \
return $clean_filename;
} # 1}}}
sub generate_sql { # {{{1
my ($elapsed_sec, # in
$rhh_count , # in count{TYPE}{lang|code|blank|comment|scaled}
$rh_scale , # in
) = @_;
print "-> generate_sql\n" if $opt_v > 2;
#print "generate_sql A [$opt_sql_project]\n";
$opt_sql_project = cwd() unless defined $opt_sql_project;
$opt_sql_project = '' unless defined $opt_sql_project; # have seen cwd() fail
#print "generate_sql B [$opt_sql_project]\n";
$opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS;
#print "generate_sql C [$opt_sql_project]\n";
my $schema = undef;
if ($opt_sql_style eq "oracle") {
$schema = "
CREATE TABLE metadata
(
timestamp TIMESTAMP,