-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathgit.py
165 lines (134 loc) · 4.08 KB
/
git.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""Collection of all git command interactions"""
from __future__ import absolute_import
import os
import re
import subprocess
from detect_secrets_server.constants import IGNORED_FILE_EXTENSIONS
def get_last_commit_hash(directory):
return _git(
directory,
'rev-parse',
'HEAD',
)
def clone_repo_to_location(repo, directory):
"""
:type repo: str
:param repo: git url to clone
:type directory: str
:param directory: local directory path
"""
try:
# We need to run it through check_output, because we want to trigger
# a subprocess.CalledProcessError upon failure.
subprocess.check_output([
'git', 'clone',
repo,
directory,
# We clone a bare repo, because we're not interested in the
# files themselves. This will be more space efficient.
'--bare',
], stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
error_message = e.output.decode('utf-8')
# Ignore this message, because it's expected if the repo
# has already been tracked.
if not re.match(
r"fatal: destination path '[^']+' already exists",
error_message
):
raise
def fetch_new_changes(directory):
_git(
directory,
'fetch',
'--quiet',
'origin',
_get_main_branch(directory),
)
def get_baseline_file(directory, filename):
"""Take the most updated baseline, because want to get the most updated
baseline. Note that this means it's still "user-dependent", but at the
same time, we want to ignore new explicit whitelists.
Also, this would mean that we **always** get a whitelist, if exists
(rather than worrying about fixing on a commit that has a whitelist)
:returns: file contents of baseline_file
"""
try:
return _git(
directory,
'show', 'HEAD:{}'.format(filename),
)
except subprocess.CalledProcessError as e:
error_message = e.output.decode('utf-8')
# Some repositories may not have baselines.
# If so, this is a non-breaking error.
if not re.match(
r"fatal: Path '[^']+' does not exist",
error_message,
):
raise
def get_diff(directory, last_commit_hash):
"""Returns the git diff between last commit hash, and HEAD."""
return _git(
directory,
'diff',
last_commit_hash,
'HEAD',
'--',
*_filter_filenames_from_diff(directory, last_commit_hash)
)
def get_remote_url(directory):
return _git(
directory,
'remote',
'get-url',
'origin',
)
def get_blame(directory, filename, line_number):
"""Returns the author who last made the change, to a given file,
on a given line.
"""
return _git(
directory,
'blame',
_get_main_branch(directory),
'-L', '{},{}'.format(line_number, line_number),
'--show-email',
'--line-porcelain',
'--',
filename,
)
def _get_main_branch(directory):
"""While this is `master` most of the time, there are some exceptions"""
return _git(
directory,
'rev-parse',
'--abbrev-ref',
'HEAD',
)
def _filter_filenames_from_diff(directory, last_commit_hash):
filenames = _git(
directory,
'diff',
last_commit_hash,
'HEAD',
'--name-only',
'--diff-filter', 'ACM',
).splitlines()
return [
filename
for filename in filenames
if os.path.splitext(filename)[1] not in IGNORED_FILE_EXTENSIONS
]
def _git(directory, *args):
output = subprocess.check_output(
[
'git',
'--git-dir', directory,
# Work-tree is required for some git commands, because of bare repos.
# However, it doesn't hurt to put it for all of them.
'--work-tree', '.',
] + list(args),
stderr=subprocess.STDOUT
)
return output.decode('utf-8', 'ignore').strip()