Skip to content

Commit

Permalink
Merge pull request #206 from 12f23eddde/include_deleted_files
Browse files Browse the repository at this point in the history
Add the option to include deleted files in repository mining
  • Loading branch information
ishepard authored Feb 13, 2022
2 parents 036978c + 200000e commit 1376678
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 3 deletions.
8 changes: 6 additions & 2 deletions pydriller/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,19 +297,23 @@ def _useless_line(line: str):
line.startswith('"""') or \
line.startswith("*")

def get_commits_modified_file(self, filepath: str) -> List[str]:
def get_commits_modified_file(self, filepath: str, include_deleted_files=False) -> List[str]:
"""
Given a filepath, returns all the commits that modified this file
(following renames).
:param str filepath: path to the file
:param bool include_deleted_files: if True, include commits that modifies a deleted file
:return: the list of commits' hash
"""
path = str(Path(filepath))

commits = []
try:
commits = self.repo.git.log("--follow", "--format=%H", path).split('\n')
if include_deleted_files:
commits = self.repo.git.log("--follow", "--format=%H", "--", path).split('\n')
else:
commits = self.repo.git.log("--follow", "--format=%H", path).split('\n')
except GitCommandError:
logger.debug(f"Could not find information of file {path}")

Expand Down
9 changes: 8 additions & 1 deletion pydriller/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def __init__(self, path_to_repo: Union[str, List[str]],
only_commits: List[str] = None,
only_releases: bool = False,
filepath: str = None,
include_deleted_files: bool = False,
histogram_diff: bool = False,
skip_whitespaces: bool = False,
clone_repo_to: str = None,
Expand Down Expand Up @@ -97,6 +98,7 @@ def __init__(self, path_to_repo: Union[str, List[str]],
:param bool skip_whitespaces: add the "-w" option when asking for the diff
:param bool clone_repo_to: if the repo under analysis is remote, clone the repo to the specified directory
:param str filepath: only commits that modified this file will be analyzed
:param bool include_deleted_files: include commits modifying a deleted file (useful when analyzing a deleted `filepath`)
:param str order: order of commits. It can be one of: 'date-order',
'author-date-order', 'topo-order', or 'reverse'. Default is reverse.
"""
Expand Down Expand Up @@ -130,6 +132,7 @@ def __init__(self, path_to_repo: Union[str, List[str]],
"only_releases": only_releases,
"skip_whitespaces": skip_whitespaces,
"filepath": filepath,
"include_deleted_files": include_deleted_files,
"filepath_commits": None,
"tagged_commits": None,
"histogram": histogram_diff,
Expand Down Expand Up @@ -215,7 +218,11 @@ def traverse_commits(self) -> Generator[Commit, None, None]:
# git rev-list since it doesn't have the option --follow, necessary to follow
# the renames. Hence, we manually call git log instead
if self._conf.get('filepath') is not None:
self._conf.set_value('filepath_commits', git.get_commits_modified_file(self._conf.get('filepath')))
self._conf.set_value(
'filepath_commits',
git.get_commits_modified_file(self._conf.get('filepath'),
self._conf.get('include_deleted_files'))
)

# Gets only the commits that are tagged
if self._conf.get('only_releases'):
Expand Down
9 changes: 9 additions & 0 deletions tests/test_repository_mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,12 @@ def test_deletion_remotes():

for path in paths:
assert os.path.exists(path) is False


def test_deleted_files():
deleted_commits = list(
Repository('https://github.com/ishepard/pydriller',
filepath='.bettercodehub.yml',
include_deleted_files=True).traverse_commits()
)
assert len(deleted_commits) > 0

0 comments on commit 1376678

Please sign in to comment.