# Preparation

Following the instructions in https://jcubic.github.io/chat-gpt/, install the chatgpt saving bookmark. Open a ChatGPT conversation you wish to download and type into the browser bar the name of the bookmark and click on it. You will be prompted to save an html containing the ChatGPT conversation. Upload that html to jupyterlab and fill in html names as appropriate in the given examples.

# Imports

In [1]:
import pandas as pd
import io
import re

# If you do not yet have BeautifulSoup4 and python-docx installed you will have to run:
# pip3 install beautifulsoup4 python-docx
# or
# conda install -c conda-forge beautifulsoup4 python-docx
from bs4 import BeautifulSoup
from docx import Document

# Bsoup Parsing Example Chat 1

## Parse ChatGPT

In [2]:
chatgpt_html_1 = "chat-gpt-heidegger-technicity-meaning.html"

In [3]:
with open(chatgpt_html_1, "r", encoding="utf-8") as f:
    html_content = f.read()

In [4]:
tags_to_find = ["h1", "h2", "h3", "p", "table", "blockquote", "div"]
paracodes = []
paras = []

soup = BeautifulSoup(html_content, "html.parser")
conversation = soup.find("body")
title = conversation.find("div", attrs={"class": "whitespace-pre-wrap"}).get_text()
print(title)

skipnext = False
for tag in conversation.find_all(tags_to_find):
    if skipnext:
        # very clumsy kludge to avoid picking out quote paras twice
        skipnext = False
        continue

    # Preserving User Queries
    if tag.name == "div" and "class" in tag.attrs and any(["user-message" in attr for attr in tag.attrs["class"]]):
        querytext = tag.get_text()
        paracodes.append("User Query")
        paras.append(" "*40+"User Query: "+querytext)
        continue

    # Handle Block Quotes
    if tag.name == "blockquote":
        # parse out block quotes and convert to their underlying content
        rawquotecontent = str(tag)
        quotecontent = BeautifulSoup(rawquotecontent, "html.parser")

        # empty attr information
        for subtag in quotecontent.find_all(True):
            subtag.attrs = {}

        # strip blockquote tag
        dequoted = quotecontent.blockquote.decode_contents()
        dequoted = str(dequoted).replace("<p>", "<p>QuoteStart").strip() # add quote marker before blockquotes

        # reparse to content of the quote tag
        tag = BeautifulSoup(dequoted, "html.parser").find(tags_to_find)

        # very clumsy kludge to avoid picking out quote paras twice
        skipnext = True

    # parse down to header, paragraph, or table sections
    content = str(tag)
    if tag.name == "h1" or tag.name == "h2" or tag.name == "h3" or tag.name == "p":
        # clear out outer tags, p, h1, h2, h3, as well as extraneous attr information
        rawtext = BeautifulSoup(content, "html.parser")

        # empty attr information
        for subtag in rawtext.find_all(True):  # Iterate over all tags
            subtag.attrs = {}  # Set the attributes dictionary to empty

        # decode_contents strips the outer tag
        if tag.name == "p":
            text = rawtext.p.decode_contents()
        elif tag.name == "h1":
            text = rawtext.h1.decode_contents()
        elif tag.name == "h2":
            text = rawtext.h2.decode_contents()
        elif tag.name == "h3":
            text = rawtext.h3.decode_contents()

        print(f"Tag name: {tag.name}, Text content: {text}")
        paracodes.append(tag.name)
        paras.append(text)
    elif tag.name == "table":
        # use pandas to convert a chatgpt html table into a pandas dataframe
        table = pd.read_html(io.StringIO(str(tag)))[0]

        print(tag.name, table)
        paracodes.append(tag.name)
        paras.append(table)

What does Heidegger mean in his interview "Only a god can save us" when he says "technicity"?
Tag name: p, Text content: In his 1966 <em>Der Spiegel</em> interview ‚ÄúOnly a God Can Save Us‚Äù (<em>Nur noch ein Gott kann uns retten</em>), Martin Heidegger uses the term <strong>‚ÄúTechnicity‚Äù</strong> (<em>die Technik</em> or more precisely <em>die Wesensbestimmung der Technik</em>, ‚Äúthe essence of technology‚Äù) to refer <strong>not simply to machines or technological devices</strong>, but to a <strong>mode of revealing</strong> (<em>eine Weise des Entbergens</em>)‚Äîa fundamental way in which the modern world discloses or interprets Being.
Tag name: p, Text content: Here‚Äôs a breakdown of what Heidegger means:
Tag name: h3, Text content: 1. <strong>Technicity ‚â† Technology</strong>
Tag name: p, Text content: Heidegger distinguishes between:
Tag name: p, Text content: <strong>Technology as tools or machines</strong> (<em>instrumental, anthropological definition</em>) ‚Äî the ever

## Preparing Word Doc

In [5]:
document = Document()
currenttype = None
for paracode, para in zip(paracodes, paras):
    if paracode == "table":
        # add a table to the end and create a reference variable
        # extra row is so we can add the header row
        table = document.add_table(para.shape[0]+1, para.shape[1])
        table.style = 'Table Grid'
        
        # add the header rows.
        for j in range(para.shape[-1]):
            table.cell(0,j).text = para.columns[j]
        
        # add the rest of the data frame
        for i in range(para.shape[0]):
            for j in range(para.shape[-1]):
                table.cell(i+1,j).text = str(para.values[i,j])
    else:
        if paracode == "User Query":
            heading = document.add_heading("")
            currenttype = "heading"
        elif paracode == "h1":
            heading = document.add_heading("")
            currenttype = "heading"
        elif paracode == "h2":
            heading = document.add_heading("", level=2)
            currenttype = "heading"
        elif paracode == "h3":
            heading = document.add_heading("", level=3)
            currenttype = "heading"
        else:
            # handle quotes
            if para.startswith("QuoteStart"):
                para = para.replace("QuoteStart", "")
                paragraph = document.add_paragraph("", style="Quote")
            else:
                paragraph = document.add_paragraph("")
            currenttype = "paragraph"
    
        # match on text formatting. Note that raw html can't control font size, as that requires CSS.
        # surely a better way to control formatting but this is a rush job
        sections = re.split("[<>]{1}", para)
    
        strong = False
        emphasis = False
        underline = False
        for section in sections:
            if section == "strong":
                strong = True
            elif section == "em":
                emphasis = True
            elif section == "u":
                underline = True
            elif section == "/strong":
                strong = False
            elif section == "/em":
                emphasis = False
            elif section == "/u":
                underline = False
            else:
                if currenttype == "heading":
                    run = heading.add_run(section)
                elif currenttype == "paragraph":
                    run = paragraph.add_run(section)
                if strong:
                    run.bold = True
                else:
                    run.bold = False
                if emphasis:
                    run.italic = True
                else:
                    run.italic = False
                if underline:
                    run.underline = True
                else:
                    run.underline = False

In [6]:
document.save('Heidegger_Technicity.docx')

# Bsoup Parsing Example Chat 2

## Parse ChatGPT

In [7]:
chatgpt_html_1 = "chat-gpt-naval-shipyard-locations.html"

In [8]:
with open(chatgpt_html_1, "r", encoding="utf-8") as f:
    html_content = f.read()

In [9]:
tags_to_find = ["h1", "h2", "h3", "p", "table", "blockquote", "div"]
paracodes = []
paras = []

soup = BeautifulSoup(html_content, "html.parser")
conversation = soup.find("body")
title = conversation.find("div", attrs={"class": "whitespace-pre-wrap"}).get_text()
print(title)

skipnext = False
for tag in conversation.find_all(tags_to_find):
    if skipnext:
        # very clumsy kludge to avoid picking out quote paras twice
        skipnext = False
        continue

    # Preserving User Queries
    if tag.name == "div" and "class" in tag.attrs and any(["user-message" in attr for attr in tag.attrs["class"]]):
        querytext = tag.get_text()
        paracodes.append("User Query")
        paras.append(" "*40+"User Query: "+querytext)
        continue

    # Handle Block Quotes
    if tag.name == "blockquote":
        # parse out block quotes and convert to their underlying content
        rawquotecontent = str(tag)
        quotecontent = BeautifulSoup(rawquotecontent, "html.parser")

        # empty attr information
        for subtag in quotecontent.find_all(True):
            subtag.attrs = {}

        # strip blockquote tag
        dequoted = quotecontent.blockquote.decode_contents()
        dequoted = str(dequoted).replace("<p>", "<p>QuoteStart").strip() # add quote marker before blockquotes

        # reparse to content of the quote tag
        tag = BeautifulSoup(dequoted, "html.parser").find(tags_to_find)

        # very clumsy kludge to avoid picking out quote paras twice
        skipnext = True

    # parse down to header, paragraph, or table sections
    content = str(tag)
    if tag.name == "h1" or tag.name == "h2" or tag.name == "h3" or tag.name == "p":
        # clear out outer tags, p, h1, h2, h3, as well as extraneous attr information
        rawtext = BeautifulSoup(content, "html.parser")

        # empty attr information
        for subtag in rawtext.find_all(True):  # Iterate over all tags
            subtag.attrs = {}  # Set the attributes dictionary to empty

        # decode_contents strips the outer tag
        if tag.name == "p":
            text = rawtext.p.decode_contents()
        elif tag.name == "h1":
            text = rawtext.h1.decode_contents()
        elif tag.name == "h2":
            text = rawtext.h2.decode_contents()
        elif tag.name == "h3":
            text = rawtext.h3.decode_contents()

        print(f"Tag name: {tag.name}, Text content: {text}")
        paracodes.append(tag.name)
        paras.append(text)
    elif tag.name == "table":
        # use pandas to convert a chatgpt html table into a pandas dataframe
        table = pd.read_html(io.StringIO(str(tag)))[0]

        print(tag.name, table)
        paracodes.append(tag.name)
        paras.append(table)

Are San Diego, Charleston, and Groton Naval Shipyards? 
Tag name: p, Text content: Short answer: <strong>Only Groton is an active naval shipyard today.</strong>
San Diego is not a shipyard, and Charleston <em>was</em> one but closed decades ago.
Tag name: p, Text content: Here‚Äôs the breakdown:
Tag name: h2, Text content: <strong>San Diego</strong>
Tag name: p, Text content: <strong>Not a naval shipyard.</strong>
Tag name: p, Text content: San Diego has <em>Naval Base San Diego</em>, which is a major <strong>fleet homeport</strong>, not a construction or heavy-repair shipyard.
Tag name: p, Text content: Some private yards (e.g., NASSCO) do shipbuilding/repair in San Diego, but they are <strong>not naval shipyards</strong>.
Tag name: h2, Text content: <strong>Charleston (South Carolina)</strong>
Tag name: p, Text content: <strong>Was a naval shipyard, but no longer.</strong>
Tag name: p, Text content: The <strong>Charleston Naval Shipyard</strong> operated from 1901 to 1996.
Tag name: 

## Preparing Word Doc

In [10]:
document = Document()
currenttype = None
for paracode, para in zip(paracodes, paras):
    if paracode == "table":
        # add a table to the end and create a reference variable
        # extra row is so we can add the header row
        table = document.add_table(para.shape[0]+1, para.shape[1])
        table.style = 'Table Grid'
        
        # add the header rows.
        for j in range(para.shape[-1]):
            table.cell(0,j).text = para.columns[j]
        
        # add the rest of the data frame
        for i in range(para.shape[0]):
            for j in range(para.shape[-1]):
                table.cell(i+1,j).text = str(para.values[i,j])
    else:
        if paracode == "User Query":
            heading = document.add_heading("")
            currenttype = "heading"
        elif paracode == "h1":
            heading = document.add_heading("")
            currenttype = "heading"
        elif paracode == "h2":
            heading = document.add_heading("", level=2)
            currenttype = "heading"
        elif paracode == "h3":
            heading = document.add_heading("", level=3)
            currenttype = "heading"
        else:
            # handle quotes
            if para.startswith("QuoteStart"):
                para = para.replace("QuoteStart", "")
                paragraph = document.add_paragraph("", style="Quote")
            else:
                paragraph = document.add_paragraph("")
            currenttype = "paragraph"
    
        # match on text formatting. Note that raw html can't control font size, as that requires CSS.
        # surely a better way to control formatting but this is a rush job
        sections = re.split("[<>]{1}", para)
    
        strong = False
        emphasis = False
        underline = False
        for section in sections:
            if section == "strong":
                strong = True
            elif section == "em":
                emphasis = True
            elif section == "u":
                underline = True
            elif section == "/strong":
                strong = False
            elif section == "/em":
                emphasis = False
            elif section == "/u":
                underline = False
            else:
                if currenttype == "heading":
                    run = heading.add_run(section)
                elif currenttype == "paragraph":
                    run = paragraph.add_run(section)
                if strong:
                    run.bold = True
                else:
                    run.bold = False
                if emphasis:
                    run.italic = True
                else:
                    run.italic = False
                if underline:
                    run.underline = True
                else:
                    run.underline = False

In [11]:
document.save('Naval_Shipyards.docx')

# Bsoup Parsing Example Chat 3

## Parse ChatGPT

In [12]:
chatgpt_html_1 = "chat-gpt-spontaneous-moral-heroism.html"

In [13]:
with open(chatgpt_html_1, "r", encoding="utf-8") as f:
    html_content = f.read()

In [14]:
tags_to_find = ["h1", "h2", "h3", "p", "table", "blockquote", "div"]
paracodes = []
paras = []

soup = BeautifulSoup(html_content, "html.parser")
conversation = soup.find("body")
title = conversation.find("div", attrs={"class": "whitespace-pre-wrap"}).get_text()
print(title)

skipnext = False
for tag in conversation.find_all(tags_to_find):
    if skipnext:
        # very clumsy kludge to avoid picking out quote paras twice
        skipnext = False
        continue

    # Preserving User Queries
    if tag.name == "div" and "class" in tag.attrs and any(["user-message" in attr for attr in tag.attrs["class"]]):
        querytext = tag.get_text()
        paracodes.append("User Query")
        paras.append(" "*40+"User Query: "+querytext)
        continue

    # Handle Block Quotes
    if tag.name == "blockquote":
        # parse out block quotes and convert to their underlying content
        rawquotecontent = str(tag)
        quotecontent = BeautifulSoup(rawquotecontent, "html.parser")

        # empty attr information
        for subtag in quotecontent.find_all(True):
            subtag.attrs = {}

        # strip blockquote tag
        dequoted = quotecontent.blockquote.decode_contents()
        dequoted = str(dequoted).replace("<p>", "<p>QuoteStart").strip() # add quote marker before blockquotes

        # reparse to content of the quote tag
        tag = BeautifulSoup(dequoted, "html.parser").find(tags_to_find)

        # very clumsy kludge to avoid picking out quote paras twice
        skipnext = True

    # parse down to header, paragraph, or table sections
    content = str(tag)
    if tag.name == "h1" or tag.name == "h2" or tag.name == "h3" or tag.name == "p":
        # clear out outer tags, p, h1, h2, h3, as well as extraneous attr information
        rawtext = BeautifulSoup(content, "html.parser")

        # empty attr information
        for subtag in rawtext.find_all(True):  # Iterate over all tags
            subtag.attrs = {}  # Set the attributes dictionary to empty

        # decode_contents strips the outer tag
        if tag.name == "p":
            text = rawtext.p.decode_contents()
        elif tag.name == "h1":
            text = rawtext.h1.decode_contents()
        elif tag.name == "h2":
            text = rawtext.h2.decode_contents()
        elif tag.name == "h3":
            text = rawtext.h3.decode_contents()

        print(f"Tag name: {tag.name}, Text content: {text}")
        paracodes.append(tag.name)
        paras.append(text)
    elif tag.name == "table":
        # use pandas to convert a chatgpt html table into a pandas dataframe
        table = pd.read_html(io.StringIO(str(tag)))[0]

        print(tag.name, table)
        paracodes.append(tag.name)
        paras.append(table)

There is a term for acts of instinctive heroism undertaken in situations one could never have foreseen happening before the moment that required it. The passengers who attempted to storm the cockpit on Flight 93 embody it. So did Liviu Librescu in barricading the door to protect his students. So also every soldier who jumped on a grenade to save his comrades. What is this term and what other examples do you know of that embody it? 
Tag name: p, Text content: The term you‚Äôre reaching for is most often <strong>‚Äúmoral courage‚Äù</strong>, but in the very specific sense you‚Äôre describing‚Äî<em>unpremeditated, instinctive self-sacrifice in an unforeseeable crisis</em>‚Äîthe phrase that fits best is:
Tag name: h2, Text content: <strong>‚ÄúSpontaneous moral heroism‚Äù</strong>
Tag name: p, Text content: (also sometimes called <strong>‚Äúsituational heroism‚Äù</strong>)
Tag name: p, Text content: This isn‚Äôt a single canonical word in everyday English, but it <em>is</em> a well-establis

## Preparing Word Doc

In [15]:
document = Document()
currenttype = None
for paracode, para in zip(paracodes, paras):
    if paracode == "table":
        # add a table to the end and create a reference variable
        # extra row is so we can add the header row
        table = document.add_table(para.shape[0]+1, para.shape[1])
        table.style = 'Table Grid'
        
        # add the header rows.
        for j in range(para.shape[-1]):
            table.cell(0,j).text = para.columns[j]
        
        # add the rest of the data frame
        for i in range(para.shape[0]):
            for j in range(para.shape[-1]):
                table.cell(i+1,j).text = str(para.values[i,j])
    else:
        if paracode == "User Query":
            heading = document.add_heading("")
            currenttype = "heading"
        elif paracode == "h1":
            heading = document.add_heading("")
            currenttype = "heading"
        elif paracode == "h2":
            heading = document.add_heading("", level=2)
            currenttype = "heading"
        elif paracode == "h3":
            heading = document.add_heading("", level=3)
            currenttype = "heading"
        else:
            # handle quotes
            if para.startswith("QuoteStart"):
                para = para.replace("QuoteStart", "")
                paragraph = document.add_paragraph("", style="Quote")
            else:
                paragraph = document.add_paragraph("")
            currenttype = "paragraph"
    
        # match on text formatting. Note that raw html can't control font size, as that requires CSS.
        # surely a better way to control formatting but this is a rush job
        sections = re.split("[<>]{1}", para)
    
        strong = False
        emphasis = False
        underline = False
        for section in sections:
            if section == "strong":
                strong = True
            elif section == "em":
                emphasis = True
            elif section == "u":
                underline = True
            elif section == "/strong":
                strong = False
            elif section == "/em":
                emphasis = False
            elif section == "/u":
                underline = False
            else:
                if currenttype == "heading":
                    run = heading.add_run(section)
                elif currenttype == "paragraph":
                    run = paragraph.add_run(section)
                if strong:
                    run.bold = True
                else:
                    run.bold = False
                if emphasis:
                    run.italic = True
                else:
                    run.italic = False
                if underline:
                    run.underline = True
                else:
                    run.underline = False

In [16]:
document.save('spontaneous_heroism.docx')