Skip to content

Commit

Permalink
Replace file detection with identify
Browse files Browse the repository at this point in the history
  • Loading branch information
asottile committed Jan 21, 2018
1 parent 6d3c4a0 commit c5290f2
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 94 deletions.
62 changes: 9 additions & 53 deletions git_code_debt/metrics/common.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,12 @@
from __future__ import absolute_import
from __future__ import unicode_literals


PYTHON = 'Python'
YAML = 'Yaml'
TEMPLATE = 'Template'
CSS = 'Css'
MAKO_TEMPLATE = 'Mako_Template'
JAVASCRIPT = 'Javascript'
JAVA = 'Java'
ILLUSTRATOR = 'Illustrator'
HTML = 'Html'
CCPP = 'C_C++'
TEXT = 'Text'
SQL = 'SQL'


# Maps a set of file extensions to a nice name.
# Updating this will cause that file type to be tracked for LinesOfCode metric.
FILE_TYPE_MAP = {
b'.py': PYTHON,

b'.yaml': YAML,
b'.yml': YAML,

b'.css': CSS,
b'.scss': CSS,

b'.tmpl': TEMPLATE,

b'.mako': MAKO_TEMPLATE,

b'.js': JAVASCRIPT,

b'.java': JAVA,

b'.ai': ILLUSTRATOR,

b'.htm': HTML,
b'.html': HTML,

b'.h': CCPP,
b'.c': CCPP,
b'.cpp': CCPP,

b'.md': TEXT,
b'.rst': TEXT,
b'.csv': TEXT,
b'.log': TEXT,
b'.json': TEXT,
b'.xml': TEXT,
b'.txt': TEXT,

b'.sql': SQL,
}
from identify import identify

UNKNOWN = 'unknown'
IGNORED_TAGS = frozenset((
identify.DIRECTORY, identify.SYMLINK, identify.FILE,
identify.EXECUTABLE, identify.NON_EXECUTABLE,
identify.TEXT, identify.BINARY,
))
ALL_TAGS = frozenset((identify.ALL_TAGS - IGNORED_TAGS) | {UNKNOWN})
24 changes: 13 additions & 11 deletions git_code_debt/metrics/curse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@

import collections

from identify import identify

from git_code_debt.metric import Metric
from git_code_debt.metrics.base import DiffParserBase
from git_code_debt.metrics.common import FILE_TYPE_MAP
from git_code_debt.metrics.common import ALL_TAGS
from git_code_debt.metrics.common import UNKNOWN
from git_code_debt.metrics.curse_words import word_list


Expand Down Expand Up @@ -34,20 +37,19 @@ def get_metrics_from_stat(self, _, file_diff_stats):
total_curses = total_curses + curses_changed

# Track by file extension -> type mapping
file_type = FILE_TYPE_MAP.get(file_diff_stat.extension, 'unknown')
curses_by_file_type[file_type] += curses_changed
filename = file_diff_stat.filename.decode('UTF-8')
tags = identify.tags_from_filename(filename) or {UNKNOWN}

for tag in tags:
curses_by_file_type[tag] += curses_changed

# Yield overall metric and one per type of expected mapping types
yield Metric('TotalCurseWords', total_curses)
for file_type in set(FILE_TYPE_MAP.values()) | {'unknown'}:
curses_changed = curses_by_file_type.get(file_type, 0)
yield Metric(
'TotalCurseWords_{}'.format(file_type),
curses_changed,
)
for tag in ALL_TAGS:
curses_changed = curses_by_file_type[tag]
yield Metric('TotalCurseWords_{}'.format(tag), curses_changed)

def get_possible_metric_ids(self):
return ['TotalCurseWords'] + [
'TotalCurseWords_{}'.format(file_type)
for file_type in set(FILE_TYPE_MAP.values()) | {'unknown'}
'TotalCurseWords_{}'.format(tag) for tag in ALL_TAGS
]
25 changes: 13 additions & 12 deletions git_code_debt/metrics/lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@

import collections

from identify import identify

from git_code_debt.metric import Metric
from git_code_debt.metrics.base import DiffParserBase
from git_code_debt.metrics.common import FILE_TYPE_MAP
from git_code_debt.metrics.common import ALL_TAGS
from git_code_debt.metrics.common import UNKNOWN


class LinesOfCodeParser(DiffParserBase):
Expand All @@ -24,21 +27,19 @@ def get_metrics_from_stat(self, _, file_diff_stats):
# Track total overall
total_lines += lines_changed

# Track by file extension -> type mapping
file_type = FILE_TYPE_MAP.get(file_diff_stat.extension, 'unknown')
lines_by_file_type[file_type] += lines_changed
filename = file_diff_stat.filename.decode('UTF-8')
tags = identify.tags_from_filename(filename) or {UNKNOWN}

for tag in tags:
lines_by_file_type[tag] += lines_changed

# Yield overall metric and one per type of expected mapping types
yield Metric('TotalLinesOfCode', total_lines)
for file_type in set(FILE_TYPE_MAP.values()) | {'unknown'}:
lines_changed = lines_by_file_type.get(file_type, 0)
yield Metric(
'TotalLinesOfCode_{}'.format(file_type),
lines_changed,
)
for tag in ALL_TAGS:
lines_changed = lines_by_file_type[tag]
yield Metric('TotalLinesOfCode_{}'.format(tag), lines_changed)

def get_possible_metric_ids(self):
return ['TotalLinesOfCode'] + [
'TotalLinesOfCode_{}'.format(file_type)
for file_type in set(FILE_TYPE_MAP.values()) | {'unknown'}
'TotalLinesOfCode_{}'.format(tag) for tag in ALL_TAGS
]
16 changes: 6 additions & 10 deletions metric_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,9 @@
# NOTE: metrics and metric_expressions may be omitted

Groups:
- Cheetah:
metrics: ['TotalLinesOfCode_Template']
metric_expressions:
- ^.*Cheetah.*$
- Python:
metric_expressions:
- ^.*Python.*$
- (?i)^.*Python.*$
- CurseWords:
metric_expressions:
- ^TotalCurseWords.*$
Expand Down Expand Up @@ -50,8 +46,8 @@ CommitLinks:
# These denote the metrics to show in the widget.
WidgetMetrics:
TotalLinesOfCode: {}
TotalLinesOfCode_Css: {}
TotalLinesOfCode_Python: {}
TotalLinesOfCode_Javascript: {}
TotalLinesOfCode_Text: {}
TotalLinesOfCode_Yaml: {}
TotalLinesOfCode_css: {}
TotalLinesOfCode_python: {}
TotalLinesOfCode_javascript: {}
TotalLinesOfCode_plain-text: {}
TotalLinesOfCode_yaml: {}
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
},
install_requires=[
'flask',
'identify',
'jsonschema',
'mako',
'pyyaml',
Expand Down
8 changes: 4 additions & 4 deletions tests/metrics/curse_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ def test_curse_words_parser():
parser = CurseWordsParser()
input_stats = [
FileDiffStat(
b'templates/foo.tmpl',
[b'#man seriously, fuck cheetah'],
b'some/file.rb',
[b'#man seriously, fuck ruby'],
[],
None,
),
Expand All @@ -24,5 +24,5 @@ def test_curse_words_parser():
),
]
metrics = list(parser.get_metrics_from_stat(Commit.blank, input_stats))
assert Metric('TotalCurseWords_Template', 1) in metrics
assert Metric('TotalCurseWords_Python', 0) in metrics
assert Metric('TotalCurseWords_ruby', 1) in metrics
assert Metric('TotalCurseWords_python', 0) in metrics
4 changes: 2 additions & 2 deletions tests/metrics/lines_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def test_lines_of_code_parser():

expected_value = {
'TotalLinesOfCode': 3,
'TotalLinesOfCode_Python': 1,
'TotalLinesOfCode_Yaml': 2,
'TotalLinesOfCode_python': 1,
'TotalLinesOfCode_yaml': 2,
}
for metric in metrics:
assert metric.value == expected_value.get(metric.name, 0)
4 changes: 2 additions & 2 deletions tests/server/servlets/widget_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ def test_widget_data(server):

def test_widget_data_multiple_values(server):
with metrics_enabled(
{'TotalLinesOfCode': {}, 'TotalLinesOfCode_Text': {}},
{'TotalLinesOfCode': {}, 'TotalLinesOfCode_plain-text': {}},
):
response = server.client.post(
flask.url_for('widget.data'),
data={'diff': file_diff_stat_test.SAMPLE_OUTPUT},
)
response_pq = pyquery.PyQuery(response.json['metrics'])
assert 'TotalLinesOfCode_Text' in response_pq.text()
assert 'TotalLinesOfCode_plain-text' in response_pq.text()

0 comments on commit c5290f2

Please sign in to comment.