In [1]:
#!/usr/bin/env python
# coding: utf-8

# In[29]:


import struct
import csv

def decode_char(byte_data):
    """Decodes CHAR fields."""
    return byte_data.decode('utf-8').strip()

def decode_int(byte_data, length):
    """Decodes integer fields of a specified length using big-endian byte order."""
    return int.from_bytes(byte_data[:length], byteorder='big')

def process_binary_file(file_path, byte_fields):
    chunk_size = 76  # Fixed record length for ICA

    with open(file_path, 'rb') as binary_file:
        while chunk := binary_file.read(chunk_size):  # Read file in chunks
            if len(chunk) < chunk_size:
                print(f"Warning: Incomplete chunk of size {len(chunk)}")
                continue

            record = {}

            # Process byte-level fields
            for field_name, (start_byte, length, data_type) in byte_fields.items():
                field_bytes = chunk[start_byte - 1:start_byte - 1 + length]
                if data_type == 'char':
                    record[field_name] = decode_char(field_bytes)
                elif data_type == 'int':
                    record[field_name] = decode_int(field_bytes, length)

            yield record  # Yield each record

# Define the column structure for ICA
byte_fields = {
    'week': (1, 4, 'int'),  # 4 bytes for week number
    'ica_number': (5, 6, 'int'),  # 6 bytes for ICA number
    'store': (11, 5, 'int'),  # 5 bytes for store number
    'st_override': (16, 1, 'char'),  # 1 byte for st_override
    'event': (17, 4, 'int'),  # 4 bytes for event
    'system': (21, 2, 'char'),  # 2 bytes for system
    'vendor': (23, 6, 'int'),  # 6 bytes for vendor
    'item': (29, 6, 'int'),  # 6 bytes for item
    'generation': (35, 1, 'char'),  # 1 byte for generation
    'upc_man_override': (36, 1, 'char'),  # 1 byte for upc_man_override
    'keycat': (37, 4, 'int'),  # 4 bytes for keycat
    'dominance_code': (41, 1, 'char'),  # 1 byte for dominance_code
    'source_code': (42, 1, 'char'),  # 1 byte for source_code
    'description': (43, 32, 'char'),  # 32 bytes for description
    'qc_flag': (75, 1, 'char'),  # 1 byte for qc_flag
}

# Convert generator output to CSV file
binary_file_path = 'ICA.W2376.U0007.S34109.D250310.T213645'
output_csv_path = 'ICA_UK_Converted_Data.csv'

with open(output_csv_path, 'w', newline='') as csvfile:
    fieldnames = [
        'week', 'ica_number', 'store', 'st_override', 'event', 'system', 'vendor', 'item', 'generation',
        'upc_man_override', 'keycat', 'dominance_code', 'source_code', 'description', 'qc_flag'
    ]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for record in process_binary_file(binary_file_path, byte_fields):
        writer.writerow(record)

print(f"Data has been written to {output_csv_path}")


Data has been written to ICA_UK_Converted_Data.csv
