## How to Convert HTML to .ipynb
This is the code example used for the blog post [https://www.marsja.se/converting-html-to-a-jupyter-notebook/](https://www.marsja.se/converting-html-to-a-jupyter-notebook/) in which we learn how to convert code chunks from a webpage to a Jupyter notebook.

In [1]:
from bs4 import BeautifulSoup
import json
import urllib

url = 'https://blog.keras.io/keras-as-a-simplified-interface-to-tensorflow-tutorial.html'

headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11'\
           '(KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}

In [2]:
req = urllib.request.Request(url, headers=headers)
page = urllib.request.urlopen(req)
text = page.read()

In [3]:
soup = BeautifulSoup(text, 'lxml')

In [4]:
create_nb = {'nbformat': 4, 'nbformat_minor': 2, 
              'cells': [], 'metadata': 
             {"kernelspec": 
              {"display_name": "Python 3", 
               "language": "python", "name": "python3"
  }}}

def get_data(soup, content_class):
    for div in soup.find_all('div', 
                             attrs={'class': content_class}):
        
        code_chunks = div.find_all('div', 
                             attrs={'class': 'highlight'})
        
        for chunk in code_chunks:
            cell_text = ' '
            cell = {}
            cell['metadata'] = {}
            cell['outputs'] = []
            cell['source'] = [chunk.get_text()]
            cell['execution_count'] = None
            cell['cell_type'] = 'code'
            create_nb['cells'].append(cell)

get_data(soup, 'entry-content')

with open('keras_tensorflow.ipynb', 'w') as jynotebook:
    jynotebook.write(json.dumps(create_nb))

In [5]:
create_nb

{'cells': [{'cell_type': 'code',
   'execution_count': None,
   'metadata': {},
   'outputs': [],
   'source': ['import tensorflow as tf\nsess = tf.Session()\n\nfrom keras import backend as K\nK.set_session(sess)\n']},
  {'cell_type': 'code',
   'execution_count': None,
   'metadata': {},
   'outputs': [],
   'source': ['# this placeholder will contain our input digits, as flat vectors\nimg = tf.placeholder(tf.float32, shape=(None, 784))\n']},
  {'cell_type': 'code',
   'execution_count': None,
   'metadata': {},
   'outputs': [],
   'source': ["from keras.layers import Dense\n\n# Keras layers can be called on TensorFlow tensors:\nx = Dense(128, activation='relu')(img)  # fully-connected layer with 128 units and ReLU activation\nx = Dense(128, activation='relu')(x)\npreds = Dense(10, activation='softmax')(x)  # output layer with 10 units and a softmax activation\n"]},
  {'cell_type': 'code',
   'execution_count': None,
   'metadata': {},
   'outputs': [],
   'source': ['labels = tf.pla