Skip to content
This repository has been archived by the owner. It is now read-only.
Browse files
Merge pull request #89 from karanjeets/master
Added metric to record the number of files in a repository
  • Loading branch information
chrismattmann committed Jan 18, 2016
2 parents 72068cd + bf5efc2 commit 25f6d9d4ac38a93980c2d835f58afcbee494c198
Showing 2 changed files with 15 additions and 0 deletions.
@@ -64,6 +64,17 @@ def normalize_path(repository):
return tmp

# Count the number of files in a directory recursively
# Leverages a basic utility to exclude some files as well
def count_num_files(path, exclude):
count = 0
for root, dirs, files in os.walk(path):
for filename in files:
if exclude not in os.path.join(root, filename):
count += 1
return count

# Prints usage of this script
def help():
print >>sys.stderr, "\n\nUsage: python <path to list of repository URLs> <path to output directory>\n"
@@ -229,6 +240,9 @@ def run(repos_list, output_dir):
stats["mime_" + mime_count[i]] = mime_count[i + 1]

# Count the number of files
stats["files"] = count_num_files(repository, ".svn")

# Write data into Solr
stats_data = []
@@ -504,6 +504,7 @@
<field name="map_end" type="date" indexed="true" stored="true" />
<field name="reduce_start" type="date" indexed="true" stored="true" />
<field name="reduce_end" type="date" indexed="true" stored="true" />
<field name="files" type="long" indexed="true" stored="true" />
<dynamicField name="license_*" type="long" indexed="true" stored="true" />
<dynamicField name="mime_*" type="long" indexed="true" stored="true" />

0 comments on commit 25f6d9d

Please sign in to comment.