In [3]:
import os
import zipfile

def convert_xlsx_to_txt(xlsx_path, output_txt_path):
    """
    Convert an XLSX file to a single text file with XML data using <file> tags.

    :param xlsx_path: Path to the input XLSX file
    :param output_txt_path: Path to save the resulting text file
    """
    with zipfile.ZipFile(xlsx_path, 'r') as zipf, open(output_txt_path, 'w', encoding='utf-8') as txt_file:
        for file_name in zipf.namelist():
            with zipf.open(file_name) as f:
                content = f.read().decode('utf-8', errors='ignore')
                txt_file.write(f'<file name="{file_name}">\n{content}\n</file>\n')

    print(f"Conversion complete. Data saved to {output_txt_path}")

# Example usage
convert_xlsx_to_txt('test.xlsx', 'output.txt')


Conversion complete. Data saved to output.txt


In [6]:
import os
import re
import zipfile
import tempfile
import shutil

def convert_txt_to_xlsx(input_txt_path, output_xlsx_path):
    """
    Convert a text file with XML-tagged file contents back to an XLSX file.
    
    :param input_txt_path: Path to the input text file with XML tags
    :param output_xlsx_path: Path to save the resulting XLSX file
    """
    # Read the text file
    with open(input_txt_path, 'r', encoding='utf-8') as txt_file:
        txt_content = txt_file.read()
    
    # Create a temporary directory to store extracted files
    temp_dir = tempfile.mkdtemp()
    
    try:
        # Create a new zip file (XLSX is essentially a zip archive)
        with zipfile.ZipFile(output_xlsx_path, 'w', zipfile.ZIP_DEFLATED) as xlsx_file:
            # Find all file entries using regex
            file_pattern = re.compile(r'<file name="(.*?)">(.*?)</file>', re.DOTALL)
            file_matches = file_pattern.findall(txt_content)
            
            for file_name, content in file_matches:
                # Create the full file path in the temporary directory
                full_path = os.path.join(temp_dir, file_name)
                
                # Ensure directory exists
                os.makedirs(os.path.dirname(full_path), exist_ok=True)
                
                # Write the content to the file
                with open(full_path, 'wb') as f:
                    # Use encode to ensure proper byte writing
                    f.write(content.strip().encode('utf-8'))
                
                # Add the file to the zip archive
                xlsx_file.write(full_path, arcname=file_name)
        
        print(f"Conversion complete. XLSX file saved to {output_xlsx_path}")
    
    finally:
        # Clean up temporary directory
        shutil.rmtree(temp_dir, ignore_errors=True)

# Example usage
convert_txt_to_xlsx('output.txt', 'reconstructed.xlsx')

Conversion complete. XLSX file saved to reconstructed.xlsx
