Skip to content

Commit

Permalink
- removed only_in_main_branch, since its behaviour was wrong (by defa…
Browse files Browse the repository at this point in the history
…ult is True, and even if set to False nothing changed)

- only_in_branch created: by default is the master branch, but one can now set to a different branch
  • Loading branch information
ishepard committed Oct 31, 2018
1 parent d9ac435 commit 0e6c8f3
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 27 deletions.
8 changes: 4 additions & 4 deletions pydriller/git_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ def get_head(self) -> Commit:
head_commit = self.repo.head.commit
return Commit(head_commit, self.path, self.main_branch)

def get_list_commits(self) -> List[Commit]:
def get_list_commits(self, branch: str = None) -> List[Commit]:
"""
Return the list of all the commits in the repo.
:return: List[Commit], the list of all the commits in the repo
"""
return self._get_all_commits()
return self._get_all_commits(branch)

def _get_all_commits(self) -> List[Commit]:
def _get_all_commits(self, branch: str = None) -> List[Commit]:
all_commits = []
for commit in self.repo.iter_commits():
for commit in self.repo.iter_commits(branch):
all_commits.append(self.get_commit_from_gitpython(commit))
return all_commits

Expand Down
26 changes: 5 additions & 21 deletions pydriller/repository_mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ def __init__(self, path_to_repo: Union[str, List[str]],
from_commit: str = None, to_commit: str = None,
from_tag: str = None, to_tag: str = None,
reversed_order: bool = False,
only_in_main_branch: bool = False,
only_in_branches: List[str] = None,
only_in_branch: str = None,
only_modifications_with_file_types: List[str] = None,
only_no_merge: bool = False):
"""
Expand All @@ -57,8 +56,7 @@ def __init__(self, path_to_repo: Union[str, List[str]],
:param str from_tag: starting the analysis from specified tag (only if `since` and `from_commit` are None)
:param str to_tag: ending the analysis from specified tag (only if `to` and `to_commit` are None)
:param bool reversed_order: whether the commits should be analyzed in reversed order
:param bool only_in_main_branch: whether only commits in main branch should be analyzed
:param List[str] only_in_branches: only commits in these branches will be analyzed
:param str only_in_branch: only commits in this branch will be analyzed
:param List[str] only_modifications_with_file_types: only modifications with that file types will be analyzed
:param bool only_no_merge: if True, merges will not be analyzed
"""
Expand All @@ -74,14 +72,13 @@ def __init__(self, path_to_repo: Union[str, List[str]],
self._since = since
self._to = to
self._reversed_order = reversed_order
self._only_in_main_branch = only_in_main_branch
self._only_in_branches = only_in_branches
self._only_in_branch = only_in_branch
self._only_modifications_with_file_types = only_modifications_with_file_types
self._only_no_merge = only_no_merge

def _sanity_check_repos(self, path_to_repo):
if not isinstance(path_to_repo, str) and not isinstance(path_to_repo, list):
raise Exception('The path to the repo has to be of type \"string\" or \"list of strings\"!')
raise Exception("The path to the repo has to be of type 'string' or 'list of strings'!")

def _sanity_check_filters(self, git_repo: GitRepository):
# If single is defined, not other filters should be
Expand Down Expand Up @@ -151,7 +148,7 @@ def traverse_commits(self) -> Generator[Commit, None, None]:

logger.info('Analyzing git repository in {}'.format(git_repo.path))

all_cs = self._apply_filters_on_commits(git_repo.get_list_commits())
all_cs = self._apply_filters_on_commits(git_repo.get_list_commits(self._only_in_branch))

if not self._reversed_order:
all_cs.reverse()
Expand All @@ -167,13 +164,6 @@ def traverse_commits(self) -> Generator[Commit, None, None]:
yield commit

def _is_commit_filtered(self, commit: Commit):
if self._only_in_main_branch is True and commit.in_main_branch is False:
logger.debug('Commit filtered for main branch')
return True
if self._only_in_branches is not None:
if not self._commit_branch_in_branches(commit):
logger.debug('Commit filtered for only in branches')
return True
if self._only_modifications_with_file_types is not None:
if not self._has_modification_with_file_type(commit):
logger.debug('Commit filtered for modification types')
Expand All @@ -183,12 +173,6 @@ def _is_commit_filtered(self, commit: Commit):
return True
return False

def _commit_branch_in_branches(self, commit: Commit):
for branch in commit.branches:
if branch in self._only_in_branches:
return True
return False

def _has_modification_with_file_type(self, commit):
for mod in commit.modifications:
if mod.filename.endswith(tuple(self._only_modifications_with_file_types)):
Expand Down
17 changes: 15 additions & 2 deletions tests/integration/test_commit_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_mod_with_file_types():


def test_only_in_main_branch():
lc = list(RepositoryMining('test-repos/git-5/', only_in_main_branch=True).traverse_commits())
lc = list(RepositoryMining('test-repos/git-5/').traverse_commits())

assert 5 == len(lc)
assert '4a17f31c0d1285477a3a467d0bc3cb38e775097d' == lc[0].hash
Expand All @@ -41,7 +41,7 @@ def test_only_in_main_branch():


def test_multiple_filters():
lc = list(RepositoryMining('test-repos/git-5/', only_in_main_branch=True, only_no_merge=True).traverse_commits())
lc = list(RepositoryMining('test-repos/git-5/', only_no_merge=True).traverse_commits())

assert 4 == len(lc)
assert '4a17f31c0d1285477a3a467d0bc3cb38e775097d' == lc[0].hash
Expand All @@ -58,7 +58,20 @@ def test_no_filters():
assert '375de7a8275ecdc0b28dc8de2568f47241f443e9' == lc[1].hash
assert 'b8c2be250786975f1c6f47e96922096f1bb25e39' == lc[2].hash


def test_no_single_commit():
with pytest.raises(Exception):
for commit in RepositoryMining('test-repos/git-5', single="6fe83d9fbf9a63cc1c51e5fe6fd5230f7fbbce6f").traverse_commits():
print(commit.hash)


def test_only_in_branch():
lc = list(RepositoryMining('test-repos/git-5/', only_in_branch='branch2').traverse_commits())
assert 6 == len(lc)

assert '4a17f31c0d1285477a3a467d0bc3cb38e775097d' == lc[0].hash
assert 'ff663cf1931a67d5e47b75fc77dcea432c728052' == lc[1].hash
assert 'fa8217c324e7fb46c80e1ddf907f4e141449637e' == lc[2].hash
assert '5d9d79607d7e82b6f236aa29be4ba89a28fb4f15' == lc[3].hash
assert '377e0f474d70f6205784d0150ee0069a050c29ed' == lc[4].hash
assert '6fe83d9fbf9a63cc1c51e5fe6fd5230f7fbbce6f' == lc[5].hash

0 comments on commit 0e6c8f3

Please sign in to comment.