In [None]:
#Project Title:
#Automated Book Publication Workflow



In [None]:
#Overview:
'''This Python project automates content capture, AI rewriting, human-in-the-loop revision, and content storage 
with smart retrieval—just like a real publishing pipeline.'''

In [None]:
pip install selenium
pip install pillow
pip install chromadb
pip install playwright

In [16]:
#import 
import os  #Handles folder creation and file paths.
import time #Adds delay for webpage loading.
import uuid # Generates unique IDs.
import webbrowser #Opens files like PDFs in the default browser.
from selenium import webdriver # Automates browser tasks like scrolling and screenshots.
from PIL import Image # Converts and processes screenshots into a PDF.
from chromadb import PersistentClient #Stores and retrieves text using AI-based vector search.

In [17]:
# Step 1: Take Screenshots
def take_screenshots():  # Starts the screenshot function.
    print(" Taking screenshots...")

    url = "https://en.wikisource.org/wiki/The_Gates_of_Morning/Book_1/Chapter_1"  # URL of the chapter.

    screenshot_dir = "ai_writer/screenshots"  # Folder to save screenshots.
    os.makedirs(screenshot_dir, exist_ok=True)  # Create the folder if it doesn't exist.

    options = webdriver.ChromeOptions()  # Set browser options.
    options.add_argument("--headless")  # Run Chrome without opening a window.
    options.add_argument("--window-size=1920,3000")  # Set browser size for full page.

    driver = webdriver.Chrome(options=options)  # Launch Chrome with settings.
    driver.get(url)  # Open the chapter URL.
    time.sleep(3)  # Wait for the content to load.

    total_height = driver.execute_script("return document.body.scrollHeight")  # Get total page height.
    view_height = 900  # Height of each screenshot viewport.
    scrolls = total_height // view_height + 1  # Calculate number of scrolls needed.

    for i in range(scrolls):  # Loop through each section of the page.
        driver.execute_script(f"window.scrollTo(0, {i * view_height});")  # Scroll to the next part.
        time.sleep(1)  # Small delay to let the scroll complete.
        screenshot_path = os.path.join(screenshot_dir, f"chapter1_{i+1:02d}.png")  # Path for each image.
        driver.save_screenshot(screenshot_path)  # Take and save the screenshot.
        print(f" Saved: {screenshot_path}")  # Log the saved file.

    driver.quit()  # Close the browser after all screenshots are taken.


In [18]:
# Step 2: Convert Screenshots to PDF
# Converts the .png screenshots to a single .pdf file.
def screenshots_to_pdf():
    print(" Converting screenshots to PDF...")

    screenshot_dir = "ai_writer/screenshots"  # Folder where screenshots are saved.
    images = []  # List to store image objects.

    # Loop through sorted screenshot files.
    for filename in sorted(os.listdir(screenshot_dir)):
        if filename.endswith(".png"):  # Only process PNG files.
            path = os.path.join(screenshot_dir, filename)  # Full path of image.
            img = Image.open(path).convert("RGB")  # Open image and convert to RGB.
            images.append(img)  # Add image to list.

    pdf_path = "ai_writer/final_chapter_screenshots.pdf"  # Output PDF path.

    if images:
        # Save all images into one PDF.
        images[0].save(pdf_path, save_all=True, append_images=images[1:])
        print(f" PDF created at: {pdf_path}")
    else:
        print(" No PNG images found.")  # Error if no screenshots were found.


In [19]:
# Step 3: Simulate AI Writing and Review
# Simulates how an AI writer rewrites content by changing a name.
# It creates a tool (function) that will take a sentence or paragraph and modify it — like an AI writing assistant.
def ai_writer(text):
    return text.replace("Dick", "Taori")  # Replaces "Dick" with "Taori"

# Simulates how an AI reviewer edits or refines content further.
#It acts like an AI content editor — it reviews and slightly modifies the text to improve or refine it (e.g., updating names, titles, or tone).
def ai_reviewer(text):
    return text.replace("Taori", "Chief Taori")  # Replaces "Taori" with "Chief Taori"


In [20]:
# Step 4: Save Final Text
# This step rewrites the sample chapter text using AI-like logic by replacing names and saves the final reviewed version to a text file.
def save_reviewed_text():
    text = """The Gates of Morning\nCHAPTER I – The Canoe Builder\nDick stood on a coral ledge looking out to sea..."""
    rewritten = ai_writer(text)               # AI writer transforms the text
    reviewed = ai_reviewer(rewritten)        # AI reviewer edits the rewritten text

    path = "ai_writer/final_reviewed_chapter.txt"
    os.makedirs("ai_writer", exist_ok=True)  # Creates output folder if not present
    with open(path, "w", encoding="utf-8") as f:
        f.write(reviewed)                    # Saves the reviewed text to a .txt file

    print(f" Final text saved to: {path}")
    return reviewed                          # Returns the final reviewed text


In [21]:
# Step 5: Save to ChromaDB
# Stores the reviewed chapter text in ChromaDB for versioning and search.
def save_to_chromadb(text):
    print(" Saving to ChromaDB...")
    client = PersistentClient(path="chromadb_store")              # Connects to ChromaDB using a persistent storage path
    collection = client.get_or_create_collection("chapters")      # Gets or creates a collection named 'chapters'
    collection.add(documents=[text], metadatas=[{"chapter": "1"}], ids=["ch1-v1"])  # Adds the text with metadata and a unique ID
    print(" Text saved to ChromaDB.")


In [22]:
# Step 6:  Reinforcement Learning Search (RL) Search Simulation
# Performs a similarity search in ChromaDB to find the most relevant document for a given query.
def rl_search(query):
    client = PersistentClient(path="chromadb_store")                          # Connects to the ChromaDB store
    collection = client.get_or_create_collection("chapters")                 # Retrieves the 'chapters' collection
    results = collection.query(query_texts=[query], n_results=1)             # Searches for the top matching document
    return results['documents'][0][0]                                        # Returns the top matched document


In [23]:
# Step 7: Run Everything
# Executes the complete end-to-end workflow from scraping to search.
def run_pipeline():
    print(" Starting full workflow...")                         # Signals start of the automated process
    take_screenshots()                                           # Captures webpage screenshots
    screenshots_to_pdf()                                         # Converts screenshots into a single PDF
    final_text = save_reviewed_text()                            # Applies AI writing and saves final chapter
    save_to_chromadb(final_text)                                 # Stores final text in ChromaDB
    result = rl_search("canoe builder")                          # Performs a search query using RL logic
    print(f"\n RL Search Output: {result}")                     # Prints the best matching result


In [24]:
# Execute the full pipeline
run_pipeline()

 Starting full workflow...
 Taking screenshots...
 Saved: ai_writer/screenshots\chapter1_01.png
 Saved: ai_writer/screenshots\chapter1_02.png
 Saved: ai_writer/screenshots\chapter1_03.png
 Saved: ai_writer/screenshots\chapter1_04.png
 Saved: ai_writer/screenshots\chapter1_05.png
 Saved: ai_writer/screenshots\chapter1_06.png
 Converting screenshots to PDF...
 PDF created at: ai_writer/final_chapter_screenshots.pdf
 Final text saved to: ai_writer/final_reviewed_chapter.txt
 Saving to ChromaDB...
 Text saved to ChromaDB.

 RL Search Output: The Gates of Morning
CHAPTER I – The Canoe Builder
Chief Taori stood on a coral ledge looking out to sea...


In [26]:
import webbrowser
import os
#Opens the final_chapter_screenshots.pdf file using the system's default web browser.
webbrowser.open("file://" + os.path.abspath("ai_writer/final_chapter_screenshots.pdf"))

True

In [27]:
import webbrowser
import os

text_path = os.path.abspath("ai_writer/final_reviewed_chapter.txt")
webbrowser.open("file://" + text_path)


True

In [13]:
import webbrowser
import os
#Opens the final_chapter_screenshots.pdf file using the system's default web browser.
webbrowser.open("file://" + os.path.abspath("ai_writer/final_chapter_screenshots.pdf"))


True