In [1]:
from docx import Document
import os

def remove_asterisks_from_docx(input_file, output_file=None):
    """
    Removes all '*' characters from a .docx file while preserving formatting.
    
    Args:
        input_file (str): Path to the input .docx file
        output_file (str): Path to the output .docx file
    """
    if not input_file.lower().endswith('.docx'):
        print("Error: This function only works with .docx files")
        return
    
    if output_file is None:
        name_parts = input_file.rsplit('.', 1)
        output_file = f"{name_parts[0]}_cleaned.docx"
    
    try:
        # Load the document
        doc = Document(input_file)
        asterisk_count = 0
        
        # Process paragraphs
        for paragraph in doc.paragraphs:
            original_text = paragraph.text
            if '*' in original_text:
                cleaned_text = original_text.replace('*', '')
                asterisk_count += original_text.count('*')
                
                # Clear the paragraph and add cleaned text while preserving runs
                if paragraph.runs:
                    # Preserve formatting by keeping runs structure
                    paragraph.clear()
                    paragraph.add_run(cleaned_text)
                else:
                    paragraph.text = cleaned_text
        
        # Process tables
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        original_text = paragraph.text
                        if '*' in original_text:
                            cleaned_text = original_text.replace('*', '')
                            asterisk_count += original_text.count('*')
                            
                            if paragraph.runs:
                                paragraph.clear()
                                paragraph.add_run(cleaned_text)
                            else:
                                paragraph.text = cleaned_text
        
        # Save the cleaned document
        doc.save(output_file)
        
        print(f"Successfully removed '*' symbols from '{input_file}'")
        print(f"Cleaned document saved as: '{output_file}'")
        print(f"Removed {asterisk_count} '*' characters")
        
    except FileNotFoundError:
        print(f"Error: File '{input_file}' not found.")
    except Exception as e:
        print(f"Error: {e}")

def remove_asterisks_preserve_formatting(input_file, output_file=None):
    """
    Advanced version that better preserves original formatting by working with runs.
    """
    if not input_file.lower().endswith('.docx'):
        print("Error: This function only works with .docx files")
        return
    
    if output_file is None:
        name_parts = input_file.rsplit('.', 1)
        output_file = f"{name_parts[0]}_cleaned_advanced.docx"
    
    try:
        doc = Document(input_file)
        asterisk_count = 0
        
        # Process paragraphs with run-level precision
        for paragraph in doc.paragraphs:
            for run in paragraph.runs:
                original_text = run.text
                if '*' in original_text:
                    cleaned_text = original_text.replace('*', '')
                    asterisk_count += original_text.count('*')
                    run.text = cleaned_text
        
        # Process tables
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        for run in paragraph.runs:
                            original_text = run.text
                            if '*' in original_text:
                                cleaned_text = original_text.replace('*', '')
                                asterisk_count += original_text.count('*')
                                run.text = cleaned_text
        
        doc.save(output_file)
        print(f"Advanced cleaning completed: '{output_file}'")
        print(f"Removed {asterisk_count} '*' characters")
        
    except Exception as e:
        print(f"Error: {e}")

# Main program
if __name__ == "__main__":
    print("DOCX Asterisk Remover")
    print("=" * 30)
    
    filename = input("Enter the .docx filename: ").strip()
    
    if not os.path.exists(filename):
        print(f"File '{filename}' not found!")
    else:
        print("\nChoose processing method:")
        print("1. Standard cleaning")
        print("2. Advanced formatting preservation")
        
        choice = input("Enter choice (1 or 2): ").strip()
        
        if choice == "2":
            remove_asterisks_preserve_formatting(filename)
        else:
            remove_asterisks_from_docx(filename)

ModuleNotFoundError: No module named 'docx'

In [2]:
import os

def debug_file_location():
    filename = input("Enter filename: ").strip()
    
    print(f"Looking for: {filename}")
    print(f"Current directory: {os.getcwd()}")
    print(f"File exists: {os.path.exists(filename)}")
    
    # List files in current directory
    print("\nFiles in current directory:")
    for file in os.listdir('.'):
        print(f"  - {file}")

# Run this to debug
debug_file_location()

Looking for: 
Current directory: /Users/leoferrer/Desktop/"*" Remover
File exists: False

Files in current directory:
  - Module IV_cleaned_advanced.docx
  - Screenshot
  - Reaction Paper.docx
  - .DS_Store
  - output_images
  - Untitled.ipynb
  - Reaction Paper2.pdf
  - Module3.docx
  - scalla.docx
  - Usability Evaluation of Arngren.docx
  - Asterisk_Remover.ipynb
  - HCI_Ferrer.pdf
  - .ipynb_checkpoints
  - Reaction Paper_cleaned_advanced.docx
  - HCI_Ferrer.docx
  - Usability Evaluation of Arngren_cleaned_advanced.docx
  - Module IV.docx
