-
Notifications
You must be signed in to change notification settings - Fork 6
/
reprex.py
420 lines (352 loc) · 14.5 KB
/
reprex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
import re
import datetime
import os.path
import asttokens
import nbconvert
import nbformat
import pyperclip
import pyimgur
import pkg_resources
# Helper functions for reprex() ---------------------------
def _get_source_code(code, code_file):
if code is not None:
code_str = code
elif code_file is not None:
with open(code_file) as fi:
code_str = fi.read()
else:
try:
code_str = pyperclip.paste()
except pyperclip.PyperclipException:
raise Exception(
'Could not retrieve code from the clipboard. '
'Try putting your code in a file and using '
'the `code_file` parameter instead of using the clipboard.'
)
return code_str
# an "input chunk" includes all lines (including comments/empty lines) that
# come after the preceding python statement and before the python statement in
# this chunk. each chunk will be placed in a notebook cell.
def _get_input_chunks(code_str, si):
tok = asttokens.ASTTokens(code_str, parse=True)
ends = {statement.last_token.end[0] for statement in tok.tree.body}
ends = list(sorted(ends))
starts = [i + 1 for i in ends]
starts.insert(0, 1)
starts = starts[:-1]
code_lines = code_str.splitlines()
schunks = [code_lines[start - 1:end] for start, end in zip(starts, ends)]
if si:
schunks = schunks + [
['import reprexpy', 'print(reprexpy.SessionInfo())']
]
return schunks
def _get_setup_code():
magic_one = '%matplotlib inline'
# save env var so SessionInfo can filter out import statements as needed
env = 'import os; os.environ["REPREX_RUNNING"] = "true"'
# set up settings for displaying plot outputs
p1 = 'import IPython.display; IPython.display.set_matplotlib_close(False)'
p2 = 'import matplotlib.pyplot; matplotlib.pyplot.ioff();'
python_statements = '; '.join([env, p1, p2])
return [[magic_one]] + [[python_statements]]
def _run_nb(statement_chunks, kernel_name):
scode = _get_setup_code()
statement_chunks = scode + statement_chunks
nb = nbformat.v4.new_notebook()
nb['cells'] = [
nbformat.v4.new_code_cell('\n'.join(i))
for i in statement_chunks
]
if kernel_name is None:
ep = nbconvert.preprocessors.ExecutePreprocessor(
timeout=600, allow_errors=True
)
else:
ep = nbconvert.preprocessors.ExecutePreprocessor(
timeout=600, allow_errors=True, kernel_name=kernel_name
)
node_out, _ = ep.preprocess(nb, {})
return node_out
def _extract_outputs(cells):
all_outputs = [[] if not i['outputs'] else i['outputs'] for i in cells]
return all_outputs[len(_get_setup_code()):]
# helper used in _get_code_block_start_stops
def _is_plot_output(el):
# check if the node is for an image output
if el.output_type == 'display_data':
if hasattr(el, 'data'):
if hasattr(el.data, 'image/png'):
return True
return False
def _any_plot_outputs(lst):
return any([_is_plot_output(i) for i in lst])
# get the line numbers where 'code blocks' start and stop. a code block is a
# set of source code line(s)/text output(s) that are all inside the same
# fenced-in code block.
def _get_code_block_start_stops(outputs, si):
len_outputs = len(outputs)
last_ind = len_outputs - 1
# a statement is the last statement in a block if that statement either
# returned a plot output or is the statement right before the call the
# SessionInfo()
cb_stops = [
i[0]
for i in enumerate(outputs)
if _any_plot_outputs(i[1]) or (i[0] == last_ind - 1 and si)
]
cb_stops = list(sorted(set(cb_stops + [last_ind])))
# first start index will always be first statement (i.e., index 0). then,
# to get the remaining start indexes, we add 1 to the index of the stop
# indexes (assuming the stop index doesn't also coincide with last index in
# statement list - i.e., last statement in code). note, we assume here that
# the first statement doesn't result in plot output, which seems safe.
cb_starts = [0] + [i + 1 for i in cb_stops if i + 1 <= last_ind]
assert len(cb_starts) == len(cb_stops), \
'\n\nlist of start indexes for code blocks is not' \
' the same length of as list of stop indexes. starts is {} while ' \
'stops is {}'.format(cb_starts, cb_stops)
return list(zip(cb_starts, cb_stops))
# we need to extract the text output for all output types except display_data.
# during this process we also process some of the text outputs where needed
# (e.g., strip ansi color codes from error traceback text) and add our output
# comment char to the beginning of each text output line.
def _get_one_txt_output(output_el, comment, venue):
if not output_el:
pass
return None
elif output_el.output_type == 'execute_result':
# results of type execute_result should always be strings, so have to
# convert to list (of strings)
txt = [output_el['data']['text/plain']]
elif output_el.output_type == 'stream':
print_txt = output_el['text']
# stream results will also be presented as strings, but we need to add
# the comment char after each newline of printed text. note, this will
# strip the trailing newlines that usually come with calling `print`,
# which is desired behavior in our case.
txt = print_txt.splitlines()
elif output_el.output_type == 'error':
# error traceback is given in a list, usually with one line of
# traceback per element. we need to remove ansi color codes from
# traceback text and split any elements in list that are actually two
# lines, then finally concat lists.
txt = [
re.sub('\x1b\\[(.*?)([@-~])', '', i)
for i in output_el['traceback']
]
txt = [i.splitlines() for i in txt]
txt = [x for i in txt for x in i]
txt = [
'Traceback (most recent call last):' if
re.search('traceback .+most recent call last', i, re.IGNORECASE)
else i
for i in txt if re.search('[^-]', i)
]
elif output_el.output_type == 'display_data':
return None
else:
assert False, 'Ran into an unknown output_type'
if venue == 'sx':
return txt
else:
return [comment + ' ' + i for i in txt]
# for each element of the output list (i.e., for each output for a given cell),
# get all the text outputs of that cell and merge them into a single list. all
# outputs are considered "text outputs" except those that correspond to plot
# output.
def _get_txt_outputs(outputs, comment, venue):
tmp_out = [
[_get_one_txt_output(j, comment, venue) for j in i]
for i in outputs
]
# remove None values in lists
tmp_out = [[j for j in i if j] for i in tmp_out]
# merge multi-element lists into single element lists
return [[x for i in one for x in i] for one in tmp_out]
def _get_image_urls(node):
data = node['data']['image/png'].encode()
authentication = {'Authorization': 'Client-ID ' + '14fb4fdc5c02a96'}
return pyimgur.request.send_request(
'https://api.imgur.com/3/image',
params={'image': data},
method='POST',
authentication=authentication
)[0]['link']
def _get_markedup_urls(one_out, venue):
if _any_plot_outputs(one_out):
img_urls = [
_get_image_urls(i)
for i in one_out if _is_plot_output(i)
]
ptxt_out = [
' .. image:: ' + i if venue == 'sx' else '![](' + i + ')'
for i in img_urls
]
ptxt_out = '\n\n'.join(ptxt_out)
return '\n\n' + ptxt_out
else:
return ''
def _get_advertisement():
now = datetime.datetime.now()
date = now.strftime('%Y-%m-%d')
return '<sup>Created on ' + date + \
' by the [reprexpy package](https://github.com/crew102/reprexpy)</sup>'
def reprex_ex(file):
r"""Get the path to an example reprex file
Parameters
----------
file : {'basic-example.py', 'error.py', 'plotting.py'}
Name of the file whose path you want.
Returns
-------
str
A path to an example reprex file.
"""
return pkg_resources.resource_filename(
'reprexpy', os.path.join('examples', file)
)
# reprex() ---------------------------
def reprex(code=None, code_file=None, venue='gh', kernel_name=None,
comment='#>', si=False, advertise=True):
r"""Render a reproducible example of Python code (a reprex).
Runs Python code inside a fresh IPython session, captures the results, and
marks everything up using the appropriate markdown syntax (determined
by ``venue``). The code for your reprex can come from one of three places:
1. **The clipboard** (the default). Code for the reprex will be taken from
the clipboard if you leave ``code=None`` and ``code_file=None``.
2. **A string.** Use the ``code`` parameter to pass in a string of code.
3. **A file.** Use the ``code_file`` parameter to specify a path to a file
containing reprex code.
Parameters
----------
code : str, optional
The code that makes up your reprex (e.g.,
``'x = "hi there"\nprint(x)'``).
code_file : str, optional
Path to a file that contains your reprex.
venue : {'gh', 'so', 'sx'}, optional
The venue that your reprex is bound for. Choose 'gh' if your reprex
will be posted to GitHub, 'so' if it's bound for Stack Overflow, or
'sx' if you will be inserting it into Sphinx docs.
kernel_name : str, optional
The name of the IPython kernel that you want to use to execute your
reprex. Choosing ``kernel_name=None`` (the default) means you want to
use the default kernel. See the IPython docs `kernels for
different environments
<https://ipython.readthedocs.io/en/stable/install/kernel_install.html#kernels-for-different-environments>`_
for details on how to create/use a custom kernel.
comment : str, optional
String that should be used to comment out your code's outputs. This
parameter is ignored if ``venue='sx'``.
si : bool, optional
Do you want to display your IPython kernel's session info at the end of
the reprex? See :py:class:`reprexpy.session_info.SessionInfo` for
details on session info. This parameter is ignored if ``venue='sx'``.
advertise : bool, optional
Do you want to include a note at the bottom of your reprex that says
that it was produced by the reprexpy package? This parameter is ignored
if ``venue='sx'``.
Returns
-------
str
A string containing your rendered reprex. ``reprex()`` also tries to
copy the rendered reprex to the clipboard.
Examples
--------
Render a simple reprex for GitHub:
>>> import reprexpy
>>> code = 'x = "hi there"\ny = " old friend"\nprint(x + y)'
>>> print(reprexpy.reprex(code, advertise=False))
```python
x = "hi there"
y = " old friend"
print(x + y)
#> hi there old friend
```
Render same reprex, except pull the code from a file and use
Stack Overflow markdown instead of GitHub markdown (hence the leading
spaces in the rendered result):
>>> import reprexpy
>>> file_path = reprexpy.reprex_ex('basic-example.py')
>>> print(reprexpy.reprex(code_file=file_path, venue='so', advertise=False))
# <!-- language-all: lang-py -->
x = "hi there"
y = " old friend"
print(x + y)
#> hi there old friend
Render a code example to insert into Sphinx docs:
>>> import reprexpy
>>> file_path = reprexpy.reprex_ex('sphinx-venue.py')
>>> print(reprexpy.reprex(code_file=file_path, venue='sx'))
>>> import matplotlib.pyplot as plt
>>> x = "hi there\nold friend"
>>> x
'hi there\nold friend'
>>> print(x)
hi there
old friend
>>> data = [1, 2, 3, 4]
>>> # i'm creating a plot here
>>> plt.plot(data);
>>> plt.ylabel('some numbers');
>>> plt.show()
.. image:: https://i.imgur.com/TRv5sNK.png
>>> plt.close()
"""
code_str = _get_source_code(code, code_file)
if venue == 'sx':
si = False
advertise = False
print('Rendering reprex...')
input_chunks = _get_input_chunks(code_str, si=si)
node_out = _run_nb(input_chunks, kernel_name)
outputs = _extract_outputs(node_out.cells)
start_stops = _get_code_block_start_stops(outputs, si=si)
txt_outputs = _get_txt_outputs(outputs, comment=comment, venue=venue)
# add txt_outputs to the source code (input_chunks) to create txt_chunks
if venue == 'sx':
input_chunks = [[j for j in i if j != ''] for i in input_chunks]
input_chunks = [['>>> ' + j for j in i] for i in input_chunks]
txt_chunks = [
i + j if j else i
for i, j in zip(input_chunks, txt_outputs)
]
if venue in ['so', 'sx']:
txt_chunks = [[' ' + j for j in i] for i in txt_chunks]
txt_chunks = ['\n'.join(i) for i in txt_chunks]
# group the txt_chunks into code_blocks
code_blocks = [txt_chunks[i[0]:(i[1] + 1)] for i in start_stops]
code_blocks = ['\n'.join(i) for i in code_blocks]
if venue == 'gh':
code_blocks = ['```python\n' + i + '\n```' for i in code_blocks]
# extract the urls to any plots and add the marked-up version of these
# urls at the end of the corresponding code bock
markedup_urls = [
_get_markedup_urls(outputs[i[1]], venue=venue)
for i in start_stops
]
final_blocks = [i + j for i, j in zip(code_blocks, markedup_urls)]
# add misc markup items to the first/last block
if venue == 'gh' and si:
final_blocks[-1] = '<details><summary>Session info</summary>\n\n' + \
final_blocks[-1] + '\n\n</details>'
if advertise:
if si:
final_blocks[-1] = _get_advertisement() + '\n\n' + final_blocks[-1]
else:
final_blocks[-1] = final_blocks[-1] + '\n\n' + _get_advertisement()
if venue == 'so':
final_blocks[0] = '# <!-- language-all: lang-py -->\n\n' + \
final_blocks[0]
# convert the list of blocks to a string
out = '\n\n'.join(final_blocks)
if not isinstance(out, str):
out = out.encode('utf8')
try:
pyperclip.copy(out)
except RuntimeError:
print('Could not copy rendered reprex to the clipboard.\n')
else:
print('Rendered reprex is on the clipboard.\n')
return out