In [64]:
from bs4 import BeautifulSoup
import urllib
import pandas as pd
from IPython.display import clear_output
import os

file_path = "example_input.html"
with open(file_path, "r") as file:
    contents = file.read()


'<h1 id="breast-surgery-clinical-pathway">Breast Surgery Clinical\nPathway</h1>\n<h2 id="preoperative">Preoperative</h2>\n<h3 id="npo-guidelines">NPO Guidelines</h3>\n<figure>\n<img src="static/img/vector-1.svg" alt="icon" />\n<figcaption aria-hidden="true">icon</figcaption>\n</figure>\n<ul>\n<li>Stop full liquids 8 hrs prior to surgery</li>\n<li>Stop water 2 hrs prior to surgery</li>\n<li>Consider allowing breast patients who arrive early to pre-op for\nlympho or mammography to drink water until 2 hrs prior to surgery</li>\n</ul>\n<h3 id="labs">Labs</h3>\n<figure>\n<img src="static/img/vector-2.svg" alt="icon" />\n<figcaption aria-hidden="true">icon</figcaption>\n</figure>\n<ul>\n<li>b-HCG for females patients under 66</li>\n</ul>\n<h3 id="pain-medications">Pain Medications</h3>\n<figure>\n<img src="static/img/vector-3.svg" alt="icon" />\n<figcaption aria-hidden="true">icon</figcaption>\n</figure>\n<p>Non-opioid analgesia adjuncts to consider: - Acetaminophen 1000mg\n(caution with hep

In [65]:
soup = BeautifulSoup(contents, 'html.parser')
# print(soup.prettify())

In [66]:
for h2 in soup.find_all('h2'):
    print(h2.text)

Preoperative
Intraoperative
Postoperative


In [75]:
clue = soup.h2

In [87]:
clue.prettify(formatter="html")

'<h2 id="preoperative">\n Preoperative\n</h2>\n'

In [68]:
soup.find('h2').next_sibling.next_sibling.children

<list_iterator at 0x122983a00>

In [69]:
title = soup.find('h1').text
header = ""
subheader = ""
image_url = ""
content_block = ""
rows = []

for sibling in soup.find('h1').next_siblings:
    if sibling.name == "h2":
        header = sibling.text
    if sibling.name == "p" or sibling.name == "ul":
        content_block += sibling.prettify(formatter="html")
    if sibling.name == "figure":
        image_url = sibling.find('img')['src']
        
    if sibling.name == "h3":
        row = {
            "header": header,
            "subheader": subheader,
            "image_url": image_url,
            "content_block": content_block,
        }
        rows.append(row)
        
        subheader = sibling.text
        content_block = ""
        
df = pd.DataFrame(rows[1:])

pre = df.query("header == 'Preoperative'").to_dict(orient="records")
intra = df.query("header == 'Intraoperative'").to_dict(orient="records")
post = df.query("header == 'Postoperative'").to_dict(orient="records")


In [70]:
import jinja2
environment = jinja2.Environment()
template_string = '''
{# templates/results.html #}
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <link rel="stylesheet" href="human.css" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
  </head>

<body>
  <div class="title-banner">Breast Clinical Care Pathway</div>
  <div class="frame">

  <div class="group-holder" role="list">

    <!------------------------- preop group ------------------------->
    <div class="group preop" role="listitem">
      <div class="groupheader">Preoperative</div>
      <div class="groupbody">
        {% for section in pre %}
        <div class="subsection">
          <div class="dot"><img class="vicon" src="{{ section.image_url }}"></div>
          <div class="subsection-header">{{ section.subheader }}</div>
          <div class="subsection-body">{{ section.content_block }}</div>
        </div>
        {% endfor %}
      </div>
    </div>
      
      <!------------------------- intra group ------------------------->
    <div class="group intra" role="listitem">
      <div class="groupheader">Preoperative</div>
      <div class="groupbody">
        {% for section in intra %}
        <div class="subsection">
          <div class="dot"><img class="vicon" src="{{ section.image_url }}"></div>
          <div class="subsection-header">{{ section.subheader }}</div>
          <div class="subsection-body">{{ section.content_block }}</div>
        </div>
        {% endfor %}
      </div>
    </div>
    
      <!------------------------- post group ------------------------->
    <div class="group postop" role="listitem">
      <div class="groupheader">Preoperative</div>
      <div class="groupbody">
        {% for section in post %}
        <div class="subsection">
          <div class="dot"><img class="vicon" src="{{ section.image_url }}"></div>
          <div class="subsection-header">{{ section.subheader }}</div>
          <div class="subsection-body">{{ section.content_block }}</div>
        </div>
        {% endfor %}
      </div>
    </div>

      
    <div class="foot">
      Version XYZ. Last updated Feb 31, 2023. Available online at <a href="https://pathways.stanes.link/breast">https://pathways.stanes.link/breast</a>. Submit edits or comments there.
    </div>
  </div>
  </div>


</body>

</html>

'''
template = environment.from_string(template_string)


In [71]:
context = {
    "pre": pre,
    "intra": intra,
    "post": post,
    'title': title
}
template.render(context)





'\n\n<!DOCTYPE html>\n<html lang="en">\n  <head>\n    <meta charset="utf-8" />\n    <link rel="stylesheet" href="human.css" />\n    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  </head>\n\n<body>\n  <div class="title-banner">Breast Clinical Care Pathway</div>\n  <div class="frame">\n\n  <div class="group-holder" role="list">\n\n    <!------------------------- preop group ------------------------->\n    <div class="group preop" role="listitem">\n      <div class="groupheader">Preoperative</div>\n      <div class="groupbody">\n        \n        <div class="subsection">\n          <div class="dot"><img class="vicon" src="static/img/vector-1.svg"></div>\n          <div class="subsection-header">NPO Guidelines</div>\n          <div class="subsection-body">\nStop full liquids 8 hrs prior to surgery\nStop water 2 hrs prior to surgery\nConsider allowing breast patients who arrive early to pre-op for\nlympho or mammography to drink water until 2 hrs prior to surgery\

In [ ]:
results_template = environment.get_template("results.html")