In [1]:
import os
import logging

In [2]:
def _load_content():
    """
    Load and process all .htm files from the base directory, including nested directories.
    """
    # set the base directory as the current file directory.
    base_directory = os.getcwd()
    logging.info(f"Starting to load .htm files from {base_directory}.")

    html_files = []

    # list all the files and folders in this directory
    files = os.listdir(base_directory)

    print("The list of files in this directory is: ")
    print(files)

    try:
        # Walk through the directory structure
        for root, _, files in os.walk(base_directory):
            # if the file is a folder
            

            # Find all .htm files in the current directory
            subfolder_htm_files = [file for file in files if file.endswith(".htm")]

            html_files.append(subfolder_htm_files)

            return html_files

        logging.info("Finished processing all .htm files.")
    except Exception as e:
        logging.error(f"An error occurred while traversing the directory: {e}")

In [3]:
file_contents = _load_content()

The list of files in this directory is: 
['assets', 'Computed_Curve_Templates', 'Contact_us.htm', 'Create_Curve_Data', 'csh-redirect.htm', 'Curve_Data', 'Curve_Display', 'Curve_Shading,_Splicing_and_Depth_Shifting', 'Curve_Shading_and_Splicing', 'file_process.ipynb', 'First_Topic.htm', 'Free_Format_Text', 'GEOExport.chm', 'GEOGraph', 'GEOGraph.chm', 'GeoRegEd.chm', 'Headers_and_Trailers', 'Help_Missing.htm', 'Horizontal_Track_Text_-_Lithology_Descriptions_or_Remarks', 'index.htm', 'index.ppf', 'Introduction', 'Lines', 'Load_Curve_Data', 'Log_Presentation_and_Layout', 'Log_Structure_and_Presentation', 'Modifiers_and_Qualifiers', 'Navigate_GEO', 'parentdata.js', 'Percent_Lithology', 'Print', 'projectdata.js', 'Qualitative_Data', 'Rich_Format_Text', 'screendata.js', 'Sharing', 'Sidetrack', 'sitemap.xml', 'Symbols', 'Tables', 'template', 'Text_and_Annotations', 'topic.htm', 'Touch_Screen_Devices', 'TVD', 'Useful_Videos', 'VectDraw.chm', 'Vertical_Track_Text_-_Chronological or Stratigraphic

In [4]:
file_contents

[['Contact_us.htm',
  'csh-redirect.htm',
  'First_Topic.htm',
  'Help_Missing.htm',
  'index.htm',
  'topic.htm']]

In [5]:
import os
import logging

def _load_content():
    """
    Load and process all .htm files from the base directory,
    including files in nested directories. Also, list all folders and files.
    """
    base_directory = os.getcwd()  # Ensure this is set to your root directory
    logging.info(f"Starting to load content from base directory: {base_directory}")
    
    all_folders = []
    all_files = []
    htm_files = []

    try:
        # Walk through the directory structure
        for root, dirs, files in os.walk(base_directory):
            all_folders.append(root)  # Collect all folders
            all_files.extend([os.path.join(root, file) for file in files])  # Collect all files

            # Filter .htm files specifically
            htm_files.extend([os.path.join(root, file) for file in files if file.endswith(".htm")])

        logging.info(f"Total folders found: {len(all_folders)}")
        logging.info(f"Total files found: {len(all_files)}")
        logging.info(f"Total .htm files found: {len(htm_files)}")

    except Exception as e:
        logging.error(f"An error occurred while traversing the directory: {e}")

    # Return lists of folders and files for further use or inspection
    return all_folders, all_files


In [6]:
all_folders, all_files = _load_content()

In [7]:
all_folders

['c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\assets',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\assets\\css',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\assets\\images',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\Computed_Curve_Templates',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\Computed_Curve_Templates\\Drilling',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\Computed_Curve_Templates\\Gas',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\Computed_Curve_Templates\\Gas\\Formation_Fluid_Evaluation',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\Computed_Curve_Templates\\Gas\\Gas_Quality',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\Computed_Curve_Templates\\Petrophysical',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\C

In [8]:
all_files

['c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\Contact_us.htm',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\csh-redirect.htm',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\file_process.ipynb',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\First_Topic.htm',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\GEOExport.chm',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\GEOGraph.chm',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\GeoRegEd.chm',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\Help_Missing.htm',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\index.htm',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\index.ppf',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\parentdata.js',
 'c:\\Program Files (x86)\\SDC Software\\Geo Suite 8\\Geo\\Help\\projectdata.js',
 'c:\\Program Files (x8