Skip to content

Commit

Permalink
implement a way to limit the statistics to commits after a start date
Browse files Browse the repository at this point in the history
This is really useful when computing statistics over a set of
repositories, where some repositories are much older than other.

Signed-off-by: Heikki Hokkanen <hoxu@users.sf.net>
  • Loading branch information
doudou authored and hoxu committed Jun 17, 2014
1 parent e56e7b6 commit 780c0fd
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions gitstats
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ conf = {
'project_name': '',
'merge_authors': {},
'processes': 8,
'start_date': ''
}

def getpipeoutput(cmds, quiet = False):
Expand All @@ -72,6 +73,12 @@ def getpipeoutput(cmds, quiet = False):
exectime_external += (end - start)
return output.rstrip('\n')

def getlogrange(defaultrange = 'HEAD', end_only = True):
commit_range = getcommitrange(defaultrange, end_only)
if len(conf['start_date']) > 0:
return '--since=%s %s' % (conf['start_date'], commit_range)
return commit_range

def getcommitrange(defaultrange = 'HEAD', end_only = False):
if len(conf['commit_end']) > 0:
if end_only or len(conf['commit_begin']) == 0:
Expand Down Expand Up @@ -280,7 +287,7 @@ class GitDataCollector(DataCollector):
def collect(self, dir):
DataCollector.collect(self, dir)

self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l']))
self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l']))
#self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))

# tags
Expand Down Expand Up @@ -323,7 +330,7 @@ class GitDataCollector(DataCollector):

# Collect revision statistics
# Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).split('\n')
lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
for line in lines:
parts = line.split(' ', 4)
author = ''
Expand Down Expand Up @@ -432,7 +439,7 @@ class GitDataCollector(DataCollector):
self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1

# outputs "<stamp> <files>" for each revision
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
lines = []
revs_to_read = []
time_rev_count = []
Expand Down Expand Up @@ -534,7 +541,7 @@ class GitDataCollector(DataCollector):
extra = ''
if conf['linear_linestats']:
extra = '--first-parent -m'
lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getcommitrange('HEAD'))]).split('\n')
lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
lines.reverse()
files = 0; inserted = 0; deleted = 0; total_lines = 0
author = None
Expand Down Expand Up @@ -590,7 +597,7 @@ class GitDataCollector(DataCollector):
# Similar to the above, but never use --first-parent
# (we need to walk through every commit to know who
# committed what, not just through mainline)
lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getcommitrange('HEAD'))]).split('\n')
lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
lines.reverse()
files = 0; inserted = 0; deleted = 0
author = None
Expand Down

0 comments on commit 780c0fd

Please sign in to comment.