In [7]:
import os

# Path to the large text file
large_file_path = '/path/to/large/text/file.txt'

# Directory to store the smaller page files
output_directory = '/path/to/output/folder'

# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Dictionary to store text content for each page
page_content = {}

# Read the large text file
with open(large_file_path, 'r') as file:
    for line in file:
        line = line.strip()
        if line:
            # Extract volume, page number, and text content
            parts = line.split(' ')
            
            # Check if line has the expected number of elements
            if len(parts) >= 2:
                volume = parts[0]
                try:
                    page = parts[1].split(':')[0]
                except IndexError:
                    print(f"Skipping line: {line}")
                    continue

                content = ' '.join(parts[2:])
                
                # Append content to the respective page
                page_key = f'{volume}_{page}'
                if page_key in page_content:
                    page_content[page_key].append(content)
                else:
                    page_content[page_key] = [content]
            else:
                # Handle lines that don't match the expected format
                print(f"Skipping line: {line}")

# Write the content of each page to a separate file
for page_key, content in page_content.items():
    try:
        volume, page = page_key.split('_')
    except ValueError:
        print(f"Skipping invalid page key: {page_key}")
        continue
    
    output_filename = f'{volume}_{page}.txt'
    output_file_path = os.path.join(output_directory, output_filename)
    
    with open(output_file_path, 'w') as file:
        for line in content:
            line = line.lstrip()  # Remove leading whitespace from each line
            if line or not previous_line_empty:  # Write the line if it's not empty or if the previous line was empty
                file.write(line + '\n')
            
            previous_line_empty = not line  # Update previous_line_empty flag