Skip to content

Commit

Permalink
Adding the option to remove commands and environments (for now \begin…
Browse files Browse the repository at this point in the history
…{comment}\end{comment})
  • Loading branch information
jponttuset committed Jun 4, 2019
1 parent 53ccbca commit 367d607
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 50 deletions.
28 changes: 25 additions & 3 deletions arxiv_latex_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,19 @@ def _copy_file(filename, params):
os.path.join(params['output_folder'], filename))


def _remove_comments(text):
def _remove_command(text, command):
"""Removes '\\command{*}' from the string 'text'."""
return re.sub(r'\\' + command + r'\{[^{}]*\}', '', text)


def _remove_environment(text, environment):
"""Removes '\\begin{environment}*\\end{environment}' from 'text'."""
return re.sub(
r'\\begin\{' + environment + r'\}[^{}]*\\end\{' + environment + r'\}',
'', text)


def _remove_comments_inline(text):
"""Removes the comments from the string 'text'."""
if 'auto-ignore' in text:
return text
Expand Down Expand Up @@ -120,8 +132,11 @@ def _read_remove_comments_and_write_file(filename, parameters):
os.path.join(parameters['output_folder'], os.path.dirname(filename)))
content = _read_file_content(
os.path.join(parameters['input_folder'], filename))
content_out = [_remove_comments(line) for line in content]
_write_file_content(''.join(content_out),
content = [_remove_comments_inline(line) for line in content]
content = _remove_environment(''.join(content), 'comment')
for command in parameters['commands_to_delete']:
content = _remove_command(content, command)
_write_file_content(content,
os.path.join(parameters['output_folder'], filename))


Expand Down Expand Up @@ -261,6 +276,13 @@ def _handle_arguments():
'--compress_pdf',
action='store_true',
help='Compress PDF images using ghostscript (Linux and Mac only).')
parser.add_argument(
'--commands_to_delete',
nargs='+',
default=[],
help=('LaTeX commands that will be deleted. Useful for e.g. user-defined '
'\\todo commands.'),
required=False)

return vars(parser.parse_args())

Expand Down
136 changes: 89 additions & 47 deletions arxiv_latex_cleaner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,56 +25,97 @@

class UnitTests(parameterized.TestCase):

@parameterized.named_parameters({
'testcase_name': 'no_comment',
'line_in': 'Foo\n',
'true_output': 'Foo\n'
}, {
'testcase_name': 'auto_ignore',
'line_in': '%auto-ignore\n',
'true_output': '%auto-ignore\n'
}, {
'testcase_name': 'percent',
'line_in': r'100\% accurate\n',
'true_output': r'100\% accurate\n'
}, {
'testcase_name': 'comment',
'line_in': ' % Comment\n',
'true_output': ''
}, {
'testcase_name': 'comment_inline',
'line_in': 'Foo %Comment\n',
'true_output': 'Foo %\n'
})
def test_remove_comments(self, line_in, true_output):
self.assertEqual(arxiv_latex_cleaner._remove_comments(line_in), true_output)

@parameterized.named_parameters({
'testcase_name': 'all_pass',
'inputs': ['abc', 'bca'],
'patterns': ['a'],
'true_outputs': ['abc', 'bca'],
}, {
'testcase_name': 'not_all_pass',
'inputs': ['abc', 'bca'],
'patterns': ['a$'],
'true_outputs': ['bca'],
})
@parameterized.named_parameters(
{
'testcase_name': 'no_comment',
'line_in': 'Foo\n',
'true_output': 'Foo\n'
}, {
'testcase_name': 'auto_ignore',
'line_in': '%auto-ignore\n',
'true_output': '%auto-ignore\n'
}, {
'testcase_name': 'percent',
'line_in': r'100\% accurate\n',
'true_output': r'100\% accurate\n'
}, {
'testcase_name': 'comment',
'line_in': ' % Comment\n',
'true_output': ''
}, {
'testcase_name': 'comment_inline',
'line_in': 'Foo %Comment\n',
'true_output': 'Foo %\n'
})
def test_remove_comments_inline(self, line_in, true_output):
self.assertEqual(
arxiv_latex_cleaner._remove_comments_inline(line_in), true_output)

@parameterized.named_parameters(
{
'testcase_name': 'no_command',
'text_in': 'Foo\nFoo2\n',
'true_output': 'Foo\nFoo2\n'
}, {
'testcase_name': 'command_not_removed',
'text_in': '\\textit{Foo\nFoo2}\n',
'true_output': '\\textit{Foo\nFoo2}\n'
}, {
'testcase_name': 'command_removed',
'text_in': 'A\\todo{B\nC}\nD\n\\end{document}',
'true_output': 'A\nD\n\\end{document}'
})
def test_remove_command(self, text_in, true_output):
self.assertEqual(
arxiv_latex_cleaner._remove_command(text_in, 'todo'), true_output)

@parameterized.named_parameters(
{
'testcase_name': 'no_environment',
'text_in': 'Foo\n',
'true_output': 'Foo\n'
}, {
'testcase_name': 'environment_not_removed',
'text_in': 'Foo\n\\begin{equation}\n3x+2\n\\end{equation}\nFoo',
'true_output': 'Foo\n\\begin{equation}\n3x+2\n\\end{equation}\nFoo'
}, {
'testcase_name': 'environment_removed',
'text_in': 'Foo\\begin{comment}\n3x+2\n\\end{comment}\nFoo',
'true_output': 'Foo\nFoo'
})
def test_remove_environment(self, text_in, true_output):
self.assertEqual(
arxiv_latex_cleaner._remove_environment(text_in, 'comment'),
true_output)

@parameterized.named_parameters(
{
'testcase_name': 'all_pass',
'inputs': ['abc', 'bca'],
'patterns': ['a'],
'true_outputs': ['abc', 'bca'],
}, {
'testcase_name': 'not_all_pass',
'inputs': ['abc', 'bca'],
'patterns': ['a$'],
'true_outputs': ['bca'],
})
def test_keep_pattern(self, inputs, patterns, true_outputs):
self.assertEqual(
list(arxiv_latex_cleaner._keep_pattern(inputs, patterns)), true_outputs)

@parameterized.named_parameters({
'testcase_name': 'all_pass',
'inputs': ['abc', 'bca'],
'patterns': ['a'],
'true_outputs': [],
}, {
'testcase_name': 'not_all_pass',
'inputs': ['abc', 'bca'],
'patterns': ['a$'],
'true_outputs': ['abc'],
})
@parameterized.named_parameters(
{
'testcase_name': 'all_pass',
'inputs': ['abc', 'bca'],
'patterns': ['a'],
'true_outputs': [],
}, {
'testcase_name': 'not_all_pass',
'inputs': ['abc', 'bca'],
'patterns': ['a$'],
'true_outputs': ['abc'],
})
def test_remove_pattern(self, inputs, patterns, true_outputs):
self.assertEqual(
list(arxiv_latex_cleaner._remove_pattern(inputs, patterns)),
Expand Down Expand Up @@ -111,7 +152,8 @@ def test_complete(self):
'images/im2_included.jpg': 200
},
'im_size': 100,
'compress_pdf': False
'compress_pdf': False,
'commands_to_delete': ['mytodo']
})

# Checks the set of files is the same as in the true folder.
Expand Down
5 changes: 5 additions & 0 deletions tex/main.tex
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
% Whole line comment

Text% Inline comment
\begin{comment}
This is an environment comment.
\end{comment}

This is a percent \%.
% Whole line comment without newline
\includegraphics{images/im1_included.png}

This is a todo command\mytodo{Do this later}
3 changes: 3 additions & 0 deletions tex_arXiv_true/main.tex
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@

Text%


This is a percent \%.
\includegraphics{images/im1_included.png}

This is a todo command

0 comments on commit 367d607

Please sign in to comment.