Permalink
Browse files

Initial draft of more formal notebook format.

* Basic Python notebook uses a simple Struct subclass,
  NotebookNode for representing the notebook components.
* XML and JSON readers/writers work in full round trip manner.
* Python reader/writter works for code cells.
* Everything is tested.
  • Loading branch information...
1 parent 8012152 commit f3a5072fc60b4502521f27f1bdf0a2ec67cf0668 @ellisonbg ellisonbg committed Jul 28, 2011
@@ -1,76 +1,79 @@
"""The basic dict based notebook format."""
+import pprint
import uuid
+from IPython.utils.ipstruct import Struct
+
+
+class NotebookNode(Struct):
+ pass
+
def new_code_cell(input=None, prompt_number=None, output_text=None, output_png=None,
output_html=None, output_svg=None, output_latex=None, output_json=None,
output_javascript=None):
"""Create a new code cell with input and output"""
- cell = {}
- cell['cell_type'] = 'code'
+ cell = NotebookNode()
+ cell.cell_type = 'code'
if input is not None:
- cell['input'] = unicode(input)
+ cell.input = unicode(input)
if prompt_number is not None:
- cell['prompt_number'] = int(prompt_number)
+ cell.prompt_number = int(prompt_number)
- output = {}
+ output = NotebookNode()
if output_text is not None:
- output['text/plain'] = unicode(output_text)
+ output.text = unicode(output_text)
if output_png is not None:
- output['image/png'] = bytes(output_png)
+ output.png = bytes(output_png)
if output_html is not None:
- output['text/html'] = unicode(output_html)
+ output.html = unicode(output_html)
if output_svg is not None:
- output['image/svg+xml'] = unicode(output_svg)
+ output.svg = unicode(output_svg)
if output_latex is not None:
- output['text/latex'] = unicode(output_latex)
+ output.latex = unicode(output_latex)
if output_json is not None:
- output['application/json'] = unicode(output_json)
+ output.json = unicode(output_json)
if output_javascript is not None:
- output['application/javascript'] = unicode(output_javascript)
+ output.javascript = unicode(output_javascript)
- cell['output'] = output
+ cell.output = output
return cell
def new_text_cell(text=None):
"""Create a new text cell."""
- cell = {}
+ cell = NotebookNode()
if text is not None:
- cell['text'] = unicode(text)
- cell['cell_type'] = 'text'
+ cell.text = unicode(text)
+ cell.cell_type = 'text'
return cell
def new_worksheet(name=None, cells=None):
"""Create a worksheet by name with with a list of cells."""
- ws = {}
+ ws = NotebookNode()
if name is not None:
- ws['name'] = unicode(name)
- else:
- ws['name'] = u''
+ ws.name = unicode(name)
if cells is None:
- ws['cells'] = []
+ ws.cells = []
else:
- ws['cells'] = list(cells)
+ ws.cells = list(cells)
return ws
def new_notebook(name=None, id=None, worksheets=None):
"""Create a notebook by name, id and a list of worksheets."""
- nb = {}
+ nb = NotebookNode()
if name is not None:
- nb['name'] = unicode(name)
- else:
- nb['name'] = u''
+ nb.name = unicode(name)
if id is None:
- nb['id'] = unicode(uuid.uuid4())
+ nb.id = unicode(uuid.uuid4())
else:
- nb['id'] = unicode(id)
+ nb.id = unicode(id)
if worksheets is None:
- nb['worksheets'] = []
+ nb.worksheets = []
else:
- nb['worksheets'] = list(worksheets)
+ nb.worksheets = list(worksheets)
return nb
@@ -1,7 +1,7 @@
"""Read and write notebooks in JSON format."""
from base64 import encodestring
-from .base import NotebookReader, NotebookWriter, base64_decode
+from .rwbase import NotebookReader, NotebookWriter, base64_decode
import json
@@ -14,15 +14,15 @@ def default(self, obj):
class JSONReader(NotebookReader):
- def reads(s, **kwargs):
+ def reads(self, s, **kwargs):
nb = json.loads(s, **kwargs)
nb = base64_decode(nb)
return nb
class JSONWriter(NotebookWriter):
- def writes(nb, **kwargs):
+ def writes(self, nb, **kwargs):
kwargs['cls'] = BytesEncoder
kwargs['indent'] = 4
return json.dumps(nb, **kwargs)
View
@@ -1,19 +1,19 @@
"""Read and write notebooks as regular .py files."""
-from .base import NotebookReader, NotebookWriter
-from .nbdict import new_code_cell, new_worksheet, new_notebook
+from .rwbase import NotebookReader, NotebookWriter
+from .nbbase import new_code_cell, new_worksheet, new_notebook
class PyReader(NotebookReader):
- def reads(s, **kwargs):
+ def reads(self, s, **kwargs):
lines = s.splitlines()
cells = []
cell_lines = []
for line in lines:
- if line.startswith('# <codecell>'):
- code = '\n'.join(cell_lines)
- code = code.strip('\n')
+ if line.startswith(u'# <codecell>'):
+ code = u'\n'.join(cell_lines)
+ code = code.strip(u'\n')
if code:
cells.append(new_code_cell(input=code))
cell_lines = []
@@ -26,15 +26,16 @@ def reads(s, **kwargs):
class PyWriter(NotebookWriter):
- def writes(nb, **kwargs):
+ def writes(self, nb, **kwargs):
lines = []
- for ws in nb['worksheets']:
- for cell in ws['cells']:
- if cell['cell_type'] == 'code':
- input = cell['input']
+ for ws in nb.worksheets:
+ for cell in ws.cells:
+ if cell.cell_type == 'code':
+ input = cell.input
+ lines.extend([u'# <codecell>',u''])
lines.extend(input.splitlines())
- lines.extend(['','# <codecell>',''])
- return ''.join(lines)
+ lines.append(u'')
+ return unicode('\n'.join(lines))
_reader = PyReader()
View
@@ -2,43 +2,136 @@
from xml.etree import ElementTree as ET
-from .base import NotebookReader, NotebookWriter
-from .nbdict import new_code_cell, new_worksheet, new_notebook
+from .rwbase import NotebookReader, NotebookWriter
+from .nbbase import new_code_cell, new_text_cell, new_worksheet, new_notebook
+
+
+def indent(elem, level=0):
+ i = "\n" + level*" "
+ if len(elem):
+ if not elem.text or not elem.text.strip():
+ elem.text = i + " "
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+ for elem in elem:
+ indent(elem, level+1)
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+ else:
+ if level and (not elem.tail or not elem.tail.strip()):
+ elem.tail = i
+
+
+def _get_text(e, tag):
+ sub_e = e.find(tag)
+ if sub_e is None:
+ return None
+ else:
+ return sub_e.text
class XMLReader(NotebookReader):
- def reads(s, **kwargs):
- pass
+ def reads(self, s, **kwargs):
+ root = ET.fromstring(s)
+
+ nbname = _get_text(root,'name')
+ nbid = _get_text(root,'id')
+
+ worksheets = []
+ for ws_e in root.getiterator('worksheet'):
+ wsname = _get_text(ws_e,'name')
+ cells = []
+ for cell_e in ws_e.getiterator():
+ if cell_e.tag == 'codecell':
+ input = _get_text(cell_e,'input')
+ output_e = cell_e.find('output')
+ if output_e is not None:
+ output_text = _get_text(output_e,'text')
+ output_png = _get_text(output_e,'png')
+ output_svg = _get_text(output_e,'svg')
+ output_html = _get_text(output_e,'html')
+ output_latex = _get_text(output_e,'latex')
+ output_json = _get_text(output_e,'json')
+ output_javascript = _get_text(output_e,'javascript')
+ cc = new_code_cell(input=input,output_png=output_png,
+ output_text=output_text,output_svg=output_svg,
+ output_html=output_html,output_latex=output_latex,
+ output_json=output_json,output_javascript=output_javascript
+ )
+ cells.append(cc)
+ if cell_e.tag == 'textcell':
+ text = _get_text(cell_e,'text')
+ cells.append(new_text_cell(text=text))
+ ws = new_worksheet(name=wsname,cells=cells)
+ worksheets.append(ws)
+
+ nb = new_notebook(name=nbname,id=nbid,worksheets=worksheets)
+ return nb
class XMLWriter(NotebookWriter):
- def writes(nb, **kwargs):
+ def writes(self, nb, **kwargs):
nb_e = ET.Element('notebook')
- name_e = ET.SubElement(nb_e, 'name')
- name_e.text = nb.get('name','')
- id_e = ET.SubElement(nb_e, 'id')
- id_e.text = nb.get('id','')
- for ws in nb['worksheets']:
+ if 'name' in nb:
+ name_e = ET.SubElement(nb_e, 'name')
+ name_e.text = nb.name
+ if 'id' in nb:
+ id_e = ET.SubElement(nb_e, 'id')
+ id_e.text = nb.id
+ for ws in nb.worksheets:
ws_e = ET.SubElement(nb_e, 'worksheet')
- ws_name_e = ET.SubElement(ws_e, 'name')
- ws_name_e.text = ws.get('name','')
- for cell in ws['cells']:
- cell_type = cell['cell_type']
+ if 'name' in ws:
+ ws_name_e = ET.SubElement(ws_e, 'name')
+ ws_name_e.text = ws.name
+ for cell in ws.cells:
+ cell_type = cell.cell_type
if cell_type == 'code':
- output = cell['output']
- cell_e = ET.SubElement(ws_e, 'cell')
- input_e = ET.SubElement(cell_e, 'input')
- input_e.text = cell.get('input','')
+ output = cell.output
+ cell_e = ET.SubElement(ws_e, 'codecell')
output_e = ET.SubElement(cell_e, 'output')
- text_e = ET.SubElement(output_e, 'text')
- text_e.text = cell.output
- elif cell_type == 'text':
- pass
-
+ if 'input' in cell:
+ input_e = ET.SubElement(cell_e, 'input')
+ input_e.text = cell.input
+ if 'prompt_number' in cell:
+ prompt_number_e = ET.SubElement(cell_e, 'prompt_number')
+ input_e.text = cell.prompt_number
+ if 'text' in output:
+ text_e = ET.SubElement(output_e, 'text')
+ text_e.text = output.text
+ if 'png' in output:
+ png_e = ET.SubElement(output_e, 'png')
+ png_e.text = output.png
+ if 'html' in output:
+ html_e = ET.SubElement(output_e, 'html')
+ html_e.text = output.html
+ if 'svg' in output:
+ svg_e = ET.SubElement(output_e, 'svg')
+ svg_e.text = output.svg
+ if 'latex' in output:
+ latex_e = ET.SubElement(output_e, 'latex')
+ latex_e.text = output.latex
+ if 'json' in output:
+ json_e = ET.SubElement(output_e, 'json')
+ json_e.text = output.json
+ if 'javascript' in output:
+ javascript_e = ET.SubElement(output_e, 'javascript')
+ javascript_e.text = output.javascript
+ elif cell_type == 'text':
+ cell_e = ET.SubElement(ws_e, 'textcell')
+ if 'text' in cell:
+ cell_text_e = ET.SubElement(cell_e, 'text')
+ cell_text_e.text = cell.text
+
+ indent(nb_e)
+ txt = ET.tostring(nb_e, encoding="utf-8")
+ txt = '<?xml version="1.0" encoding="utf-8"?>\n' + txt
+ return txt
+
+
_reader = XMLReader()
_writer = XMLWriter()
@@ -1,7 +1,7 @@
from base64 import encodestring, decodestring
-def base64_decode(self, nb):
+def base64_decode(nb):
"""Base64 encode all bytes objects in the notebook."""
for ws in nb['worksheets']:
for cell in ws['cells']:
@@ -11,7 +11,7 @@ def base64_decode(self, nb):
return nb
-def base64_encode(self, nb):
+def base64_encode(nb):
"""Base64 decode all binary objects in the notebook."""
for ws in nb['worksheets']:
for cell in ws['cells']:
@@ -43,3 +43,4 @@ def write(self, nb, fp, **kwargs):
return fp.write(self.dumps(nb,**kwargs))
+
No changes.
Oops, something went wrong.

0 comments on commit f3a5072

Please sign in to comment.