In [25]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import os
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import glob
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import re
from datetime import datetime, timedelta

In [26]:
# set working directory for accessing and saving files
os.chdir('d:\\Faith and Religion Stuff\\Come, Follow Me\\come-follow-me-breakdown-builder')

So before I can begin adapting my conference calculator into a Come, Follow Me Calculator, I need to scrape and adjust the starting and ending dates from the Come, Follow Me page. 

The initial link I'm starting with is the link to the 2024 Come, Follow Me landing page. 
* link: https://www.churchofjesuschrist.org/study/manual/come-follow-me-for-home-and-church-book-of-mormon-2024?lang=eng

Having inspected the html elements, I have found that there are a few relevant elements, at least on the landing page:
* `<div class="sc-42v4-0 caNosj heading-Vx_DR heading-Vx_DR label">`: these class objects on the landing page mark the month divisions. 
* `<li data-date-end="2024-01-07" data-date-start="2024-01-01">`: these lines mark the start and end dates for each reading
* `<a href="/study/manual/come-follow-me-for-home-and-church-book-of-mormon-2024/01?lang=eng"`: these are the links to each of the dedicated webpages for each week's readings and lessons. 
* `<p class="primaryMeta">January 1–7</p>`: these p class elements list the start and end dates in more readable format
* `<div class="sc-omeqik-7 iwWCCo">` : this div element marks the location in the html code of the "title" of the reading assignment linked above
    * `<h4 class="sc-12mz36o-0 jSCFto sc-omeqik-9 dbmmCm">`: this is an additional marker for the reading assignment title
    * `<p class="title">Introductory Pages of the Book of Mormon</p></h4></div>`: this p class is the title of the weekly reading assignment

All of these will be useful as I begin this process. 

I'll start, though, by just getting a response and confirming that my driver is up and running. 

In [27]:
cfm_link = 'https://www.churchofjesuschrist.org/study/manual/come-follow-me-for-home-and-church-book-of-mormon-2024?lang=eng'
response = requests.get(cfm_link)
response

<Response [200]>

In [28]:
soup = BeautifulSoup(response.content, "html.parser")
content = soup.prettify()  # This formats the HTML in a (more) readable way
print(content)

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title data-react-helmet="true">
   Book of Mormon 2024
  </title>
  <meta content="4.20.0" name="version"/>
  <meta content="width=device-width, initial-scale=1, minimum-scale=1" name="viewport"/>
  <meta content="" data-react-helmet="true" name="Search.doc-aid">
   <meta content="Book of Mormon 2024" data-react-helmet="true" name="title">
    <meta content="This manual contains lesson ideas and study materials for use at home and at church." data-react-helmet="true" name="description">
     <meta content="Book of Mormon 2024" data-react-helmet="true" property="og:title"/>
     <meta content="website" data-react-helmet="true" property="og:type"/>
     <meta content="https://www.churchofjesuschrist.org/study/eng/manual/come-follow-me-for-home-and-church-book-of-mormon-2024/_manifest" data-react-helmet="true" property="og:url"/>
     <link as="script" href="https://www.churchofjesuschrist.org/services/platform/v4/index

A 200 response indicates that I can use requests and a driver to get information from the linked page. Now to set up my driver. 

In [29]:
chrome_driver_dir = r'D:\\Faith and Religion Stuff\\Come, Follow Me\\chromedriver-win64'
chrome_driver_path = os.path.join(chrome_driver_dir, 'chromedriver.exe')

service = Service(chrome_driver_path)

driver = webdriver.Chrome(service=service)

In [30]:
driver.get(cfm_link)

# Find all li elements with data-date-start and data-date-end attributes
li_elements = driver.find_elements(By.CSS_SELECTOR, "li[data-date-start][data-date-end]")
start_dates = []
end_dates = []

# Extract the data-date-start and data-date-end attributes
for li in li_elements:
    start_dates.append(li.get_attribute("data-date-start"))
    end_dates.append(li.get_attribute("data-date-end"))

driver.quit()

print(start_dates)
print(end_dates)

['2024-01-01', '2024-01-08', '2024-01-15', '2024-01-22', '2024-01-29', '2024-02-05', '2024-02-12', '2024-02-19', '2024-02-26', '2024-03-04', '2024-03-11', '2024-03-18', '2024-03-25', '2024-04-01', '2024-04-08', '2024-04-15', '2024-04-22', '2024-04-29', '2024-05-06', '2024-05-13', '2024-05-20', '2024-05-27', '2024-06-03', '2024-06-10', '2024-06-17', '2024-06-24', '2024-07-01', '2024-07-08', '2024-07-15', '2024-07-22', '2024-07-29', '2024-08-05', '2024-08-12', '2024-08-19', '2024-08-26', '2024-09-02', '2024-09-09', '2024-09-16', '2024-09-23', '2024-09-30', '2024-10-07', '2024-10-14', '2024-10-21', '2024-10-28', '2024-11-04', '2024-11-11', '2024-11-18', '2024-11-25', '2024-12-02', '2024-12-09', '2024-12-16', '2024-12-23']
['2024-01-07', '2024-01-14', '2024-01-21', '2024-01-28', '2024-02-04', '2024-02-11', '2024-02-18', '2024-02-25', '2024-03-03', '2024-03-10', '2024-03-17', '2024-03-24', '2024-03-31', '2024-04-07', '2024-04-14', '2024-04-21', '2024-04-28', '2024-05-05', '2024-05-12', '202