Skip to content

Commit

Permalink
Git log handling
Browse files Browse the repository at this point in the history
Minor refactoring. Also enhanced vega circle packing bridge to allow one to specify size_column and color_column.
  • Loading branch information
elmotec committed Feb 14, 2019
1 parent e4e957d commit 6c7d400
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 67 deletions.
64 changes: 44 additions & 20 deletions codemetrics/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,42 @@ def __init__(self, git_client='git', **kwargs):
"""
super().__init__(**kwargs)
self.git_client = git_client
self.log_moved_re = re.compile(r"([-\d]+)\s+([-\d]+)\s+(\S*)\{(\S*) => (\S*)\}(\S*)")
self.log_moved_re = \
re.compile(r"([-\d]+)\s+([-\d]+)\s+(\S*)\{(\S*) => (\S*)\}(\S*)")

def parse_path_elem(self, path_elem: str):
"""Parses git output to identify lines added, removed and path.
Also handles renamed path.
Args:
path_elem: path element line.
Returns:
Quadruplet of added, removed, relpath, copyfrompath where
copyfrompath may be None.
"""
copyfrompath = None
if '{' not in path_elem:
if '=>' in path_elem:
added, removed, copyfrompath, _, relpath = \
path_elem.split()
else:
added, removed, relpath = path_elem.split()
else:
match = self.log_moved_re.match(path_elem)
if not match:
raise ValueError(f'{path_elem} not understood')
added = match.group(1)
removed = match.group(2)
relpath = match.group(3) + match.group(5) + match.group(6)
relpath = relpath.replace('//', '/')
copyfrompath = match.group(3) + match.group(4) + match.group(6)
copyfrompath = copyfrompath.replace('//', '/')
added_as_int = int(added) if added != '-' else np.nan
removed_as_int = int(removed) if removed != '-' else np.nan
return added_as_int, removed_as_int, relpath, copyfrompath

def process_entry(self, log_entry):
"""Convert a single xml <logentry/> element to csv rows.
Expand Down Expand Up @@ -62,27 +97,16 @@ def process_entry(self, log_entry):
if not path_elem:
break
# git log shows special characters in paths to indicate moves.
if '{' not in path_elem:
added, removed, relpath = path_elem.split()
else:
match = self.log_moved_re.match(path_elem)
if not match:
log.warning('failed to parse the following line:\n%s\n%s',
log_entry[0], path_elem)
continue
added = match.group(1)
removed = match.group(2)
relpath = match.group(3) + match.group(5) + match.group(6)
relpath = relpath.replace('//', '/')
copyfrompath = match.group(3) + match.group(4) + match.group(6)
copyfrompath = copyfrompath.replace('//', '/')
try:
added, removed, relpath, copyfrompath = \
self.parse_path_elem(path_elem)
except ValueError as err:
log.error(f'failed to parse {path_elem}: {err}')
continue
# - indicate binary files.
added_as_int = int(added) if added != '-' else np.nan
removed_as_int = int(removed) if removed != '-' else np.nan
entry = scm.LogEntry(rev, author=author, date=date, path=relpath,
message=msg, kind='f', added=added_as_int,
removed=removed_as_int,
copyfrompath=copyfrompath)
message=msg, kind='f', added=added,
removed=removed, copyfrompath=copyfrompath)
yield entry

def process_log_entries(self, text):
Expand Down
6 changes: 5 additions & 1 deletion codemetrics/vega.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,13 +203,17 @@ def _vis_generic(df: pd.DataFrame,
def vis_hot_spots(df: pd.DataFrame,
height: int = 300,
width: int = 400,
size_column: str = 'lines',
color_column: str = 'changes',
colorscheme: str = 'yelloworangered') -> dict:
"""Convert get_hot_spots output to a json vega dict.
Args:
df: input data returned by :func:`codemetrics.get_hot_spots`
height: vertical size of the figure.
width: horizontal size of the figure.
size_column: column that drives the size of the circles.
color_column: column that drives the color intensity of the circles.
colorscheme: color scheme. See https://vega.github.io/vega/docs/schemes/
Returns:
Expand All @@ -229,7 +233,7 @@ def vis_hot_spots(df: pd.DataFrame,
.. _Vega circle pack example: https://vega.github.io/editor/#/examples/vega/circle-packing
"""
return _vis_generic(df, size_column='lines', color_column='changes',
return _vis_generic(df, size_column=size_column, color_column=color_column,
colorscheme=colorscheme, width=width,
height=height)

Expand Down
90 changes: 44 additions & 46 deletions tests/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,57 @@

import tqdm
import pandas as pd
import numpy as np

import tests.utils as utils

import codemetrics as cm
import codemetrics.git as git


class PathElemParser(unittest.TestCase):

def setUp(self):
"""Initialize git collector."""
self.git = git._GitLogCollector()

def test_parse_path_elem(self):
"""Parsing of path element."""
pe = '21 2 dir/test.py'
added, removed, relpath, copyfrompath = self.git.parse_path_elem(pe)
self.assertEqual(21, added)
self.assertEqual(2, removed)
self.assertEqual('dir/test.py', relpath)
self.assertIsNone(copyfrompath)

def test_parse_renamed_path(self):
"""Parsing of path element."""
pe = '1 1 dir/{b/a.py => a/b.py}'
added, removed, relpath, copyfrompath = self.git.parse_path_elem(pe)
self.assertEqual(1, added)
self.assertEqual(1, removed)
self.assertEqual('dir/a/b.py', relpath)
self.assertEqual('dir/b/a.py', copyfrompath)

def test_parse_renamed_path_empty_right(self):
"""Parsing of path element."""
pe = '21 2 dir/{category => }/test.py'
added, removed, relpath, copyfrompath = self.git.parse_path_elem(pe)
self.assertEqual(21, added)
self.assertEqual(2, removed)
self.assertEqual('dir/test.py', relpath)
self.assertEqual('dir/category/test.py', copyfrompath)

def test_parse_renamed_path_empty_left(self):
"""Parsing of path element."""
pe = '- - dir/{ => subdir}/file.py'
added, removed, relpath, copyfrompath = self.git.parse_path_elem(pe)
self.assertTrue(np.isnan(added))
self.assertTrue(np.isnan(removed))
self.assertEqual('dir/subdir/file.py', relpath)
self.assertEqual('dir/file.py', copyfrompath)


def get_log():
retval = textwrap.dedent('''
[2adcc03] [elmotec] [2018-12-05 23:44:38 -0000] [Fixed Windows specific paths]
Expand Down Expand Up @@ -119,52 +163,6 @@ def test_handling_of_brackets_in_log(self, call):
xxxxxxx,elmotec,2018-12-05 23:44:38+00:00,some/file,bbb [ci skip] [skipci],f,,,,1,1'''))
self.assertEqual(expected, df)

@mock.patch('codemetrics.internals.run', autospec=True,
return_value=textwrap.dedent("""
[xxxxxxx] [elmotec] [2018-12-05 23:44:38 -0000] [blah]
- - directory/{ => subdir}/file
"""))
def test_handling_of_files_moved(self, call):
"""Handles files that were moved using the new location."""
actual = git.get_git_log('.', after=self.after)
call.assert_called_with(
f'git {git._GitLogCollector._args} --after {self.after:%Y-%m-%d} .')
expected = utils.csvlog_to_dataframe(textwrap.dedent('''\
revision,author,date,path,message,kind,copyfrompath,added,removed
xxxxxxx,elmotec,2018-12-05 23:44:38+00:00,directory/subdir/file,blah,f,directory/file,,'''))
self.assertEqual(expected, actual)

@mock.patch('codemetrics.internals.run', autospec=True,
return_value=textwrap.dedent("""
[xxxxxxx] [elmotec] [2018-12-05 23:44:38 -0000] [a]
1 1 dir/{b/a.py => a/b.py}
"""))
def test_handling_of_directory_renamed(self, call):
"""Handles subdirectories that were renamed."""
df = git.get_git_log('.', after=self.after)
call.assert_called_with(
f'git {git._GitLogCollector._args} --after {self.after:%Y-%m-%d} .')
expected = utils.csvlog_to_dataframe(textwrap.dedent('''\
revision,author,date,path,message,kind,action,copyfrompath,added,removed
xxxxxxx,elmotec,2018-12-05 23:44:38+00:00,dir/a/b.py,a,f,,dir/b/a.py,1,1'''))
self.assertEqual(expected, df)

@mock.patch('codemetrics.internals.run', autospec=True,
return_value=textwrap.dedent("""
[xxxxxxx] [elmotec] [2018-12-05 23:44:38 -0000] [a]
21 2 dir/{category => }/test.py
"""))
def test_handling_of_removed_directories(self, call):
"""Handles subdirectories that were renamed."""
df = git.get_git_log('.', after=self.after)
call.assert_called_with(
f'git {git._GitLogCollector._args} --after {self.after:%Y-%m-%d} .')
expected = utils.csvlog_to_dataframe(textwrap.dedent('''\
revision,author,date,copyfrompath,path,message,kind,action,added,removed
xxxxxxx,elmotec,2018-12-05 23:44:38+00:00,dir/category/test.py,dir/test.py,a,f,,21,2
'''))
self.assertEqual(expected, df)


class DownloadGitFilesTestCase(unittest.TestCase):
"""Test getting historical files with git."""
Expand Down

0 comments on commit 6c7d400

Please sign in to comment.