In [4]:
import pandas as pd
import os

def process_files(input_folder, output_folder):
    try:
        # Ensure the output folder exists
        os.makedirs(output_folder, exist_ok=True)
        print(f"Output folder '{output_folder}' is ready.")
        
        # Process each text file in the input folder
        for filename in os.listdir(input_folder):
            if filename.endswith('.txt'):
                input_file = os.path.join(input_folder, filename)
                print(f"Processing file '{input_file}'...")
                
                # Read the input file with pipe separator
                df = pd.read_csv(input_file, sep='|')
                print(f"File '{input_file}' read successfully.")
                
                # Remove duplicate rows
                df.drop_duplicates(inplace=True)
                print("Duplicate rows removed.")
                
                # Check if 'ImageType' column exists
                if 'ImageType' in df.columns:
                    # Remove blank rows
                    df.dropna(how='all', inplace=True)
                    print("Blank rows removed.")
                    
                    # Update the 'ImageType' column
                    df['ImageType'] = 'Passenger_list'
                    print("'ImageType' column updated.")
                    
                    # Ensure 'ImageType' column is at the first position
                    ImageType_col = df.pop('ImageType')
                    df.insert(0, 'ImageType', ImageType_col)
                    print("'ImageType' column reinserted at the first position.")
                    
                    # Create the output file path
                    output_file = os.path.join(output_folder, filename)
                    
                    # Export to a pipe-separated text file
                    df.to_csv(output_file, index=False, sep='|')
                    print(f"File saved to '{output_file}'.")
                else:
                    print(f"Error: 'ImageType' column not found in the file '{input_file}'.")
            else:
                print(f"Skipping '{filename}' as it is not a text file.")
        
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
input_folder = r'H:\WORKSPACE\test_output\in'
output_folder = r'H:\WORKSPACE\test_output\New folder'
process_files(input_folder, output_folder)


Output folder 'H:\WORKSPACE\test_output\New folder' is ready.
Processing file 'H:\WORKSPACE\test_output\in\IIMI_2236.txt'...
File 'H:\WORKSPACE\test_output\in\IIMI_2236.txt' read successfully.
Duplicate rows removed.
Blank rows removed.
'ImageType' column updated.
'ImageType' column reinserted at the first position.
File saved to 'H:\WORKSPACE\test_output\New folder\IIMI_2236.txt'.
Processing file 'H:\WORKSPACE\test_output\in\IIMI_8569.txt'...
File 'H:\WORKSPACE\test_output\in\IIMI_8569.txt' read successfully.
Duplicate rows removed.
Blank rows removed.
'ImageType' column updated.
'ImageType' column reinserted at the first position.
File saved to 'H:\WORKSPACE\test_output\New folder\IIMI_8569.txt'.
