In [2]:
import os
import pandas as pd
import warnings
from LimeNDAX import get_records, ndax_basic
import time
import tempfile

# Add the missing status code to the state dictionary
ndax_basic.state_dict[25] = "Unknown_State_25"  # Add the missing status code

def safe_convert_ndax(file_path, output_path):
    """
    Safely convert a single NDAX file to Excel with enhanced error handling.
    """
    try:
        # Create a temporary directory for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            # Set environment variable for temporary files
            original_temp = os.environ.get('TEMP')
            os.environ['TEMP'] = temp_dir
            
            try:
                # Attempt to read the file
                df = get_records(file_path)
                
                if df is not None and not df.empty:
                    # Clean the data
                    df_clean = df.copy()
                    for column in df_clean.columns:
                        if df_clean[column].dtype == 'object':
                            df_clean[column] = df_clean[column].fillna('')
                    
                    # Save to Excel
                    df_clean.to_excel(output_path, index=False)
                    return True
                    
            finally:
                # Restore original temp directory
                if original_temp:
                    os.environ['TEMP'] = original_temp
                    
    except Exception as e:
        print(f"Error converting {os.path.basename(file_path)}: {str(e)}")
        return False

def convert_ndax_files(input_folder, output_folder):
    """
    Convert all NDAX files in the input folder to Excel files.
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # Track conversions
    successful = []
    failed = []
    
    # Suppress warnings
    warnings.filterwarnings('ignore', category=FutureWarning)
    warnings.filterwarnings('ignore', category=UserWarning)
    
    # Process each file
    for filename in os.listdir(input_folder):
        if filename.lower().endswith('.ndax'):
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, os.path.splitext(filename)[0] + '.xlsx')
            
            print(f"\nProcessing: {filename}")
            if safe_convert_ndax(input_path, output_path):
                successful.append(filename)
                print(f"Successfully converted: {filename}")
            else:
                failed.append(filename)
                print(f"Failed to convert: {filename}")
    
    # Print summary
    print("\nConversion Summary:")
    print(f"Total files processed: {len(successful) + len(failed)}")
    print(f"Successfully converted: {len(successful)}")
    print(f"Failed conversions: {len(failed)}")
    
    if failed:
        print("\nFailed Files:")
        for file in failed:
            print(f"- {file}")

if __name__ == "__main__":
    input_folder = r'anonymized_data_package\machine_1' # add path
    output_folder = r'airflow/project_data/anonymized_data_package/machine_1' # add path
    
    convert_ndax_files(input_folder, output_folder)


Processing: 1001.ndax
Successfully converted: 1001.ndax

Processing: 1002.ndax
Successfully converted: 1002.ndax

Processing: 1003.ndax
Successfully converted: 1003.ndax

Processing: 1004.ndax
Successfully converted: 1004.ndax

Processing: 1005.ndax
Successfully converted: 1005.ndax

Processing: 1006.ndax
Successfully converted: 1006.ndax

Processing: 1007.ndax
Successfully converted: 1007.ndax

Processing: 1008.ndax
Successfully converted: 1008.ndax

Processing: 1009.ndax
Successfully converted: 1009.ndax

Processing: 1010.ndax
Successfully converted: 1010.ndax

Conversion Summary:
Total files processed: 10
Successfully converted: 10
Failed conversions: 0
