In [1]:
# This script allows the user to convert TXT glossaries into two-column XLSX glossaries.
# It also adds a third column "admitted" or "preferred" depending on the input file name.
# Customer provides these TXT files and this script prepares them for import into Verifika.
# Input file: Tab delimeted TXT files
# Output file: XLSX with three columns: lang1/lang2/note

In [None]:
import pandas as pd
import glob
import os

In [None]:
# Define the directory path
directory_path = r'C:\Users\User\TXT2XLSX'

# Find all TXT files in the directory
txt_files = glob.glob(os.path.join(directory_path, '*.txt'))

# Check if any TXT files were found
if not txt_files:
    print(f"No TXT files found in {directory_path}")
else:
    # Process each TXT file
    for txt_file in txt_files:
        try:
            print(f"Processing file: {txt_file}")
            data = []
            # Read the text file
            with open(txt_file, 'r', encoding='utf-8') as file:
                for line in file:
                    # Split the line by tab character to get two columns
                    columns = line.strip().split('\t')
                    if len(columns) == 2:
                        data.append(columns)
            
            # Determine the value for Column C based on the filename
            if 'admitted' in txt_file:
                column_c_value = 'admitted'
            elif 'preferred' in txt_file:
                column_c_value = 'preferred'
            else:
                column_c_value = ''

            # Create a DataFrame
            df = pd.DataFrame(data)
            
            # Add the new column with the appropriate value
            df['Column C'] = column_c_value
            
            # Define the output file path (same directory, replacing .txt with .xlsx)
            output_file_path = os.path.splitext(txt_file)[0] + '.xlsx'
            print(f"Saving to: {output_file_path}")
            
            # Save the DataFrame to an XLSX file without headers
            df.to_excel(output_file_path, index=False, header=False)
            print(f"Successfully saved: {output_file_path}")
        except Exception as e:
            print(f"Failed to process file {txt_file}. Error: {e}")

    print("All TXT files have been processed.")