In [8]:
import struct

file_name = 'example.bin'

# Two sample unsigned 32-bit integers
value1 = 12345
value2 = 67890
value3 = 1000
value4 = 2000

# '<II' means little-endian, two unsigned integers (4 bytes each)
# You can change to '>II' for big-endian if needed
binary_data = struct.pack('<IIII', value1, value2,value3,value4)

with open(file_name, 'wb') as f:
    f.write(binary_data)

print(f"Created '{file_name}' with values {value1},{value2},{value3},{value4} (u4 format).")

Created 'example.bin' with values 12345,67890,1000,2000 (u4 format).


In [9]:
import pandas as pd

idd_df = pd.read_excel('IDD_WORDS.xlsx')

print("First 5 rows of the DataFrame:")
print(idd_df.head())

print("\nColumns in the DataFrame:")
print(idd_df.columns)

First 5 rows of the DataFrame:
   WordID  StrtBit  EndBit FieldName VALUES
0       0        0       3       a_0  aa,bb
1       0        4       7       b_0    NaN
2       1        0       2       a_1     xx

Columns in the DataFrame:
Index(['WordID', 'StrtBit', 'EndBit', 'FieldName', 'VALUES'], dtype='object')


In [10]:
import struct
import numpy as np

# Assuming 'file_name' is defined from the previous step ('example.bin')
binary_words = []
word_size = 4

try:
    with open(file_name, 'rb') as f:
        while True:
            word_bytes = f.read(word_size)
            if not word_bytes:
                break
            word = struct.unpack('<I', word_bytes)[0]
            binary_words.append(word)

    print(f"Successfully re-read {len(binary_words)} unsigned 32-bit words from '{file_name}'.")
    print("Binary Words:", binary_words)
except FileNotFoundError:
    print(f"Error: The file '{file_name}' was not found. Please ensure the path is correct.")
except Exception as e:
    print(f"An error occurred while reading the binary file: {e}")

Successfully re-read 4 unsigned 32-bit words from 'example.bin'.
Binary Words: [12345, 67890, 1000, 2000]


### Implement Bit Field Extraction

This function extracts a bit field from a 32-bit word given the start bit and end bit (inclusive, 0-indexed from LSB).

In [11]:
def extract_bit_field(word, start_bit, end_bit):
    """
    Extracts a bit field from a 32-bit word.

    Args:
        word (int): The 32-bit integer from which to extract the field.
        start_bit (int): The starting bit position (0-indexed from LSB).
        end_bit (int): The ending bit position (0-indexed from LSB, inclusive).

    Returns:
        int: The extracted value.
    """
    if start_bit > end_bit:
        start_bit, end_bit = end_bit, start_bit # Ensure start is less than or equal to end

    # Calculate mask size
    num_bits = end_bit - start_bit + 1

    # Create a mask with 'num_bits' set to 1
    mask = (1 << num_bits) - 1

    # Shift the word right by 'start_bit' to align the field to LSB
    # Then apply the mask to isolate the bits
    extracted_value = (word >> start_bit) & mask
    return extracted_value

print("Bit field extraction function defined.")

Bit field extraction function defined.


In [12]:
import pandas as pd
import numpy as np # Ensure numpy is imported

# Assume idd_df and extract_bit_field function are available from previous steps

# 1. Determine words per dwell from idd_df
# This assumes WordIDs are 0-indexed and consecutive within a 'dwell' block.
# For example, if WordIDs are 0, 1, 2, then words_per_dwell will be 3.
words_per_dwell = idd_df['WordID'].max() + 1 if not idd_df.empty else 0
print(f"Determined words per dwell based on idd_df WordIDs: {words_per_dwell}")

# Remove previous ParsedValue column if it exists to avoid conflicts with new DWL columns
if 'ParsedValue' in idd_df.columns:
    idd_df = idd_df.drop(columns=['ParsedValue'])

if not binary_words:
    print("No binary words were read. Cannot perform dwell-based parsing. Please ensure the binary file was read correctly.")
elif words_per_dwell == 0:
    print("Warning: No WordIDs found in idd_df to determine words per dwell. Cannot perform dwell-based parsing.")
else:
    num_binary_words = len(binary_words)
    num_dwells = num_binary_words // words_per_dwell
    print(f"Total binary words: {num_binary_words}, calculated number of dwells: {num_dwells}")

    if num_dwells == 0 and num_binary_words > 0:
        print("Warning: Number of binary words is less than words per dwell. No full dwells can be parsed.")
    elif num_binary_words % words_per_dwell != 0:
        print(f"Warning: Binary file length ({num_binary_words}) is not a multiple of words per dwell ({words_per_dwell}). Extra binary words might be ignored.")

    # Iterate through dwells and populate new columns (DWL0, DWL1, etc.)
    for dwell_idx in range(num_dwells):
        dwell_column_name = f'DWL{dwell_idx}'
        current_dwell_values = []

        for index, row in idd_df.iterrows():
            word_id_in_dwell = row['WordID']
            start_bit = row['StrtBit']
            end_bit = row['EndBit']

            # Calculate the actual index in the binary_words list for the current dwell
            actual_binary_word_index = (dwell_idx * words_per_dwell) + word_id_in_dwell

            if actual_binary_word_index < num_binary_words:
                target_word = binary_words[actual_binary_word_index]
                extracted = extract_bit_field(target_word, start_bit, end_bit)
                current_dwell_values.append(extracted)
            else:
                # This case should ideally be caught by num_dwells calculation, but good for robustness
                current_dwell_values.append(np.nan)
                print(f"Error: Attempted to access WordID {word_id_in_dwell} in Dwell {dwell_idx} at binary_words index {actual_binary_word_index} which is out of bounds for binary data length {num_binary_words}.")

        idd_df[dwell_column_name] = current_dwell_values

    print("\nUpdated DataFrame with Dwell-based parsing:")
    print(idd_df)

Determined words per dwell based on idd_df WordIDs: 2
Total binary words: 4, calculated number of dwells: 2

Updated DataFrame with Dwell-based parsing:
   WordID  StrtBit  EndBit FieldName VALUES  DWL0  DWL1
0       0        0       3       a_0  aa,bb     9     8
1       0        4       7       b_0    NaN     3    14
2       1        0       2       a_1     xx     2     0


In [13]:
idd_df.to_excel('IDD_WORDS_PARSED.xlsx', index=False)

# Task
Load the `IDD_WORDS_PARSED.xlsx` Excel file into a pandas DataFrame.

## Load parsed Excel data

### Subtask:
Read the `IDD_WORDS_PARSED.xlsx` file into a pandas DataFrame.


**Reasoning**:
The subtask requires reading an Excel file into a pandas DataFrame. The first step is to import the pandas library and then use `pd.read_excel()` to load the data.



In [15]:
import pandas as pd

parsed_idd_df = pd.read_excel('IDD_WORDS_PARSED.xlsx')

print("Successfully loaded 'IDD_WORDS_PARSED.xlsx' into parsed_idd_df.")
print("First 5 rows of parsed_idd_df:")
print(parsed_idd_df.head())

Successfully loaded 'IDD_WORDS_PARSED.xlsx' into parsed_idd_df.
First 5 rows of parsed_idd_df:
   WordID  StrtBit  EndBit FieldName VALUES  DWL0  DWL1
0       0        0       3       a_0  aa,bb     9     8
1       0        4       7       b_0    NaN     3    14
2       1        0       2       a_1     xx     2     0


## Identify dwell columns

### Subtask:
Determine which columns in the DataFrame contain the parsed dwell values (e.g., 'DWL0', 'DWL1').


**Reasoning**:
To identify the dwell columns, I will access the column names of the `parsed_idd_df` DataFrame, filter them to find those starting with 'DWL', and store the result in a list.



In [16]:
dwell_columns = [col for col in parsed_idd_df.columns if col.startswith('DWL')]

print(f"Identified dwell columns: {dwell_columns}")

Identified dwell columns: ['DWL0', 'DWL1']


## Reconstruct binary words from dwell data

### Subtask:
Iterate through each dwell column. For each dwell, reconstruct the full 32-bit words by combining the bit field values with the parsed values for that dwell. Group these reconstructed words by their original WordID and collect them in the correct sequence.


**Reasoning**:
To reconstruct the binary words, I will iterate through each dwell column, and for each dwell, I will reconstruct the full 32-bit words by combining the bit field values from the parsed DataFrame. The process involves shifting the parsed values to their correct bit positions and using a bitwise OR operation to combine them into 32-bit words, grouped by their original WordID.



In [18]:
reconstructed_binary_words_per_dwell = []

for dwell_column in dwell_columns:
    current_dwell_reconstructed_words = {}
    for index, row in parsed_idd_df.iterrows():
        word_id = row['WordID']
        start_bit = row['StrtBit']
        end_bit = row['EndBit']
        parsed_value = row[dwell_column]

        # Initialize word to 0 if not already present for this WordID
        if word_id not in current_dwell_reconstructed_words:
            current_dwell_reconstructed_words[word_id] = 0

        # Ensure parsed_value is not NaN before processing
        if pd.isna(parsed_value):
            # Handle NaN, perhaps leave the bits as 0 or skip
            continue

        # Shift the parsed value to its correct position and combine
        # Ensure parsed_value is an integer for bitwise operations
        shifted_value = int(parsed_value) << start_bit
        current_dwell_reconstructed_words[word_id] |= shifted_value

    # Collect and sort the reconstructed words for the current dwell by WordID
    # This ensures the order of words for each dwell is consistent
    sorted_reconstructed_words = [current_dwell_reconstructed_words[k] for k in sorted(current_dwell_reconstructed_words.keys())]
    reconstructed_binary_words_per_dwell.append(sorted_reconstructed_words)

print("Reconstructed binary words per dwell:")
for i, dwell_words in enumerate(reconstructed_binary_words_per_dwell):
    print(f"Dwell {i} (from column '{dwell_columns[i]}'): {dwell_words}")

Reconstructed binary words per dwell:
Dwell 0 (from column 'DWL0'): [57, 2]
Dwell 1 (from column 'DWL1'): [232, 0]


In [19]:
import struct

output_file_name = 'example2.bin'

# Flatten the list of lists into a single list of all words
all_reconstructed_words = []
for dwell_words_list in reconstructed_binary_words_per_dwell:
    all_reconstructed_words.extend(dwell_words_list)

# Prepare the format string for struct.pack
# '<' for little-endian, 'I' for unsigned int (4 bytes)
format_string = '<' + 'I' * len(all_reconstructed_words)

# Pack the data
binary_output_data = struct.pack(format_string, *all_reconstructed_words)

# Write to the binary file
try:
    with open(output_file_name, 'wb') as f:
        f.write(binary_output_data)
    print(f"Successfully wrote {len(all_reconstructed_words)} unsigned 32-bit words to '{output_file_name}'.")
except Exception as e:
    print(f"An error occurred while writing to the binary file: {e}")


Successfully wrote 4 unsigned 32-bit words to 'example2.bin'.
