# Load Talks

Convert talk content from raw HTML to markdown format and extract key information. Write talks in JSONL format.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from datetime import datetime

import os

from models.load_know import KnowhyLoader
from models.load_utils import save_docs_to_jsonl
from bs4 import BeautifulSoup

In [3]:
# config
input_dir = '../data/load/raw/knowhys/'
output_dir = '../data/load/output/knowhys/'

today = datetime.today().strftime('%Y-%m-%d')

In [7]:
loader = KnowhyLoader(input_dir)
docs = loader.load(verbose=True)
docs

  0%|          | 0/10 [00:00<?, ?it/s]

{'url': 'https://knowhy.bookofmormoncentral.org/knowhy/are-there-mistakes-in-the-book-of-mormon', 'html': '<!DOCTYPE html>\n<html lang="en" dir="ltr" prefix="og: http://ogp.me/ns# content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ rdfs: http://www.w3.org/2000/01/rdf-schema# sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema#">\n<head>\n  <link rel="profile" href="http://www.w3.org/1999/xhtml/vocab" />\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel="shortcut icon" href="https://knowhy.bookofmormoncentral.org/sites/all/themes/knowhy_subtheme/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="description" content="Critics of the Book of Mormon have assailed the book&#039;s authenticity on the grounds that it has un

 10%|█         | 1/10 [00:00<00:03,  2.37it/s]

{'url': 'https://knowhy.bookofmormoncentral.org/knowhy/did-a-magic-world-view-influence-the-coming-forth-of-the-book-of-mormon', 'html': '<!DOCTYPE html>\n<html lang="en" dir="ltr" prefix="og: http://ogp.me/ns# content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ rdfs: http://www.w3.org/2000/01/rdf-schema# sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema#">\n<head>\n  <link rel="profile" href="http://www.w3.org/1999/xhtml/vocab" />\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel="shortcut icon" href="https://knowhy.bookofmormoncentral.org/sites/all/themes/knowhy_subtheme/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="description" content="Did a &quot;magic world view&quot; influence Joseph Smith and the comin

 20%|██        | 2/10 [00:00<00:03,  2.11it/s]

{'url': 'https://knowhy.bookofmormoncentral.org/knowhy/can-textual-studies-help-readers-understand-the-isaiah-chapters-in-2-nephi', 'html': '<!DOCTYPE html>\n<html lang="en" dir="ltr" prefix="og: http://ogp.me/ns# content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ rdfs: http://www.w3.org/2000/01/rdf-schema# sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema#">\n<head>\n  <link rel="profile" href="http://www.w3.org/1999/xhtml/vocab" />\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel="shortcut icon" href="https://knowhy.bookofmormoncentral.org/sites/all/themes/knowhy_subtheme/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="description" content="Textual criticism is the careful analysis of a text&#039;s manuscrip

 30%|███       | 3/10 [00:01<00:02,  2.76it/s]

{'url': 'https://knowhy.bookofmormoncentral.org/knowhy/an-apostles-witness', 'html': '<!DOCTYPE html>\n<html lang="en" dir="ltr" prefix="og: http://ogp.me/ns# content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ rdfs: http://www.w3.org/2000/01/rdf-schema# sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema#">\n<head>\n  <link rel="profile" href="http://www.w3.org/1999/xhtml/vocab" />\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel="shortcut icon" href="https://knowhy.bookofmormoncentral.org/sites/all/themes/knowhy_subtheme/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="description" content="Elder Jeffrey R. Holland bore powerful testimony of the divine authenticity of the Book of Mormon during the October 2009 G

 40%|████      | 4/10 [00:01<00:01,  3.02it/s]

{'url': 'https://knowhy.bookofmormoncentral.org/knowhy/are-there-other-ancient-records-like-the-book-of-mormon', 'html': '<!DOCTYPE html>\n<html lang="en" dir="ltr" prefix="og: http://ogp.me/ns# content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ rdfs: http://www.w3.org/2000/01/rdf-schema# sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema#">\n<head>\n  <link rel="profile" href="http://www.w3.org/1999/xhtml/vocab" />\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel="shortcut icon" href="https://knowhy.bookofmormoncentral.org/sites/all/themes/knowhy_subtheme/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="description" content="For some, the story of the coming forth of the Book of Mormon has seemed too bizarre an

 60%|██████    | 6/10 [00:01<00:01,  3.89it/s]

{'url': 'https://knowhy.bookofmormoncentral.org/knowhy/are-there-really-only-two-churches', 'html': '<!DOCTYPE html>\n<html lang="en" dir="ltr" prefix="og: http://ogp.me/ns# content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ rdfs: http://www.w3.org/2000/01/rdf-schema# sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema#">\n<head>\n  <link rel="profile" href="http://www.w3.org/1999/xhtml/vocab" />\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel="shortcut icon" href="https://knowhy.bookofmormoncentral.org/sites/all/themes/knowhy_subtheme/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="description" content="Today there are many different churches, but Nephi says there are only two—one of God and the other of the d

 70%|███████   | 7/10 [00:02<00:00,  4.32it/s]

{'url': 'https://knowhy.bookofmormoncentral.org/knowhy/acquiring-spiritual-knowledge-act-in-faith', 'html': '<!DOCTYPE html>\n<html lang="en" dir="ltr" prefix="og: http://ogp.me/ns# content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ rdfs: http://www.w3.org/2000/01/rdf-schema# sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema#">\n<head>\n  <link rel="profile" href="http://www.w3.org/1999/xhtml/vocab" />\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel="shortcut icon" href="https://knowhy.bookofmormoncentral.org/sites/all/themes/knowhy_subtheme/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="description" content="The process of sincerely asking for and willingly receiving spiritual knowledge plays out in numerou

 80%|████████  | 8/10 [00:02<00:00,  4.03it/s]

{'url': 'https://knowhy.bookofmormoncentral.org/knowhy/are-the-accounts-of-the-golden-plates-believable', 'html': '<!DOCTYPE html>\n<html lang="en" dir="ltr" prefix="og: http://ogp.me/ns# content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ rdfs: http://www.w3.org/2000/01/rdf-schema# sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema#">\n<head>\n  <link rel="profile" href="http://www.w3.org/1999/xhtml/vocab" />\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel="shortcut icon" href="https://knowhy.bookofmormoncentral.org/sites/all/themes/knowhy_subtheme/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="description" content="The reported weight, dimensions, and composition of the gold plates are believable. The story 

 90%|█████████ | 9/10 [00:02<00:00,  3.32it/s]

{'url': 'https://knowhy.bookofmormoncentral.org/knowhy/at-baptism-what-do-we-covenant-to-do', 'html': '<!DOCTYPE html>\n<html lang="en" dir="ltr" prefix="og: http://ogp.me/ns# content: http://purl.org/rss/1.0/modules/content/ dc: http://purl.org/dc/terms/ foaf: http://xmlns.com/foaf/0.1/ rdfs: http://www.w3.org/2000/01/rdf-schema# sioc: http://rdfs.org/sioc/ns# sioct: http://rdfs.org/sioc/types# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema#">\n<head>\n  <link rel="profile" href="http://www.w3.org/1999/xhtml/vocab" />\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel="shortcut icon" href="https://knowhy.bookofmormoncentral.org/sites/all/themes/knowhy_subtheme/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="description" content="From ancient times, covenants have been understood as mutual or bilateral promises. If baptism, in the Boo

100%|██████████| 10/10 [00:02<00:00,  3.33it/s]




In [8]:
output_filename = os.path.join(output_dir, f"{today}.jsonl")

save_docs_to_jsonl(docs, output_filename)