In [2]:
import os
from unstructured.partition.pdf import partition_pdf

EXAMPLE_DOCS_DIRECTORY = "data"

layoutpaper = "layout-parser-paper-fast.pdf"
nistdoc = "NIST.SP.800-52r2.pdf"

filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, nistdoc)

with open(filename, "rb") as f:
    elements = partition_pdf(
        file=f, 
        include_page_breaks=False,
        url=None,
        strategy="fast"
    )
    print("\n\n".join([str(el) for el in elements[:10]]))

NIST Special Publication 800-52 Revision 2

Guidelines for the Selection, Configuration, and Use of Transport Layer Security (TLS) Implementations

Kerry A. McKay David A. Cooper

This publication is available free of charge from: https://doi.org/10.6028/NIST.SP.800-52r2

C O M P U T E R S E C U R I T Y

NIST Special Publication 800-52 Revision 2

Guidelines for the Selection, Configuration, and Use of Transport Layer Security (TLS) Implementations

Kerry A. McKay David A. Cooper Computer Security Division Information Technology Laboratory

This publication is available free of charge from: https://doi.org/10.6028/NIST.SP.800-52r2

August 2019


In [4]:
from unstructured.cleaners.core import replace_unicode_quotes

replace_unicode_quotes("Philadelphia Eaglesâ\x80\x99 victory")

"Philadelphia Eagles' victory"

In [5]:
from unstructured.documents.elements import Text

element = Text("Philadelphia Eaglesâ\x80\x99 victory")
element.apply(replace_unicode_quotes)
print(element)

Philadelphia Eagles' victory


In [7]:
import re

remove_citations = lambda text: re.sub("\[\d{1,3}\]", "", text)

element = Text("[1] Geolocated combat footage has confirmed Russian gains in the Dvorichne area northwest of Svatove.")
element.apply(remove_citations)
print(element)

 Geolocated combat footage has confirmed Russian gains in the Dvorichne area northwest of Svatove.


In [8]:
from unstructured.cleaners.core import group_broken_paragraphs

text = """The big brown fox
was walking down the lane.

At the end of the lane, the
fox met a bear."""

print(group_broken_paragraphs(text))

The big brown fox was walking down the lane.

At the end of the lane, the fox met a bear.


In [9]:
import re
from unstructured.cleaners.core import group_broken_paragraphs

para_split_re = re.compile(r"(\s*\n\s*){3}")

text = """The big brown fox

was walking down the lane.


At the end of the lane, the

fox met a bear."""

print(group_broken_paragraphs(text, paragraph_split=para_split_re))

The big brown fox was walking down the lane.

At the end of the lane, the fox met a bear.


In [1]:
from unstructured.staging.base import convert_to_dict
import os
from unstructured.partition.pdf import partition_pdf
import json

EXAMPLE_DOCS_DIRECTORY = "data"

layoutpaper = "layout-parser-paper-fast.pdf"
nistdoc = "NIST.SP.800-52r2.pdf"

filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, nistdoc)

with open(filename, "rb") as f:
    elements = partition_pdf(
        file=f, 
        include_page_breaks=False,
        url=None,
        strategy="hi_res"
    )

In [2]:
print("\n\n".join([str(el) for el in elements[:10]]))

NIST Special Publication 800-52 Revision 2

Guidelines for the Selection, Configuration, and Use of Transport Layer Security (TLS) Implementations

Kerry A. McKay David A. Cooper

This publication is available free of charge from: https://doi.org/10.6028/NIST.SP.800-52r2

C O M P U T E R S E C U R I T Y

NUST National Institute of Standards and Technology U.S. Department of Commerce

NIST Special Publication 800-52 Revision 2

Guidelines for the Selection, Configuration, and Use of Transport Layer Security (TLS) Implementations

Kerry A. McKay David A. Cooper Computer Security Division Information Technology Laboratory

This publication is available free of charge from: https://doi.org/10.6028/NIST.SP.800-52r2


In [3]:

from unstructured.chunking.title import chunk_by_title

chunks = chunk_by_title(elements, multipage_sections=True)

for chunk in chunks:
    print(chunk)
    print("\n\n" + "-"*80)
    if input() == "exit":
        break

[5


--------------------------------------------------------------------------------
8] Rescorla E, Oku K, Sullivan N, Wood C (2019) Encrypted Server Name Indication for TLS 1.3. (Internet Engineering Task Force (IETF) Transport Layer Security Working Group), Internet-Draft draft-ietf-tls-esni-04. https://datatracker.ietf.org/doc/draft-ietf-tls- esni/

[59] Rescorla E, Ray M, Dispensa S, Oskov N (2010) Transport Layer Security (TLS) Renegotiation Indication Extension. (Internet Engineering Task Force (IETF)), IETF Request for Comments (RFC) 5746. https://doi.org/10.17487/RFC5746


--------------------------------------------------------------------------------
These guidelines do not


--------------------------------------------------------------------------------
 give specific recommendations on steps that can be taken to make this determination. There are tools available (such as the Data Analytics Program [69]) that can provide information to system administrators that can be use