Binary File Reader - Modular Tutorial (Standalone Script Version)
==================================================================

Run this script to execute all lessons sequentially.
Each lesson demonstrates a core concept in a modular way.

Usage:
    python binary_reader_tutorial.py

In [2]:
import struct
import os
import re
from pathlib import Path

LESSON 1: BASIC BINARY READING

In [7]:
def lesson_1_basic_reading():
    """Lesson 1: Learn basic binary file reading with struct module."""
    
    print("\n" + "="*70)
    print("LESSON 1: BASIC BINARY READING")
    print("="*70)
    
    # Define format
    RECORD_FORMAT = '<id10s'
    RECORD_SIZE = struct.calcsize(RECORD_FORMAT)
    
    print(f"\nRecord format: {RECORD_FORMAT}")
    print(f"Record size: {RECORD_SIZE} bytes")
    print(f"Breakdown: 4 (int) + 8 (double) + 10 (string) = {RECORD_SIZE} bytes")
    
    # Create sample file
    sample_records = [
        (1, 123.45, b'CODE001'),
        (2, 678.90, b'CODE002'),
        (3, 111.11, b'CODE003'),
    ]
    
    filename = 'lesson1_sample.bin'
    with open(filename, 'wb') as f:
        for record_id, value, code in sample_records:
            binary_data = struct.pack(RECORD_FORMAT, record_id, value, code)
            f.write(binary_data)
    
    print(f"\n‚úì Created '{filename}' with {len(sample_records)} records")
    
    # Read the file
    records = []
    with open(filename, 'rb') as f:
        while True:
            binary_data = f.read(RECORD_SIZE)
            if not binary_data:
                break
            
            record_id, value, code_bytes = struct.unpack(RECORD_FORMAT, binary_data)
            code = code_bytes.decode('ascii').rstrip('\x00').strip()
            
            records.append({
                'id': record_id,
                'value': value,
                'code': code
            })
    
    print(f"‚úì Read {len(records)} records:")
    for record in records:
        print(f"  ID: {record['id']}, Value: {record['value']:.2f}, Code: '{record['code']}'")
    
    print("\nüìù Key Takeaway:")
    print("   - struct.pack() converts Python data to binary")
    print("   - struct.unpack() converts binary to Python data")
    print("   - Format: '<' = little-endian, 'i' = int, 'd' = double, '10s' = 10-byte string")
    
    os.remove(filename)

In [None]:
lesson_1_basic_reading()

LESSON 2: ERROR HANDLING MODES

In [9]:
def lesson_2_error_handling():
    """Lesson 2: Three error handling strategies."""
    
    print("\n" + "="*70)
    print("LESSON 2: ERROR HANDLING MODES")
    print("="*70)
    
    RECORD_FORMAT = '<id10s'
    RECORD_SIZE = struct.calcsize(RECORD_FORMAT)
    
    # Create corrupted file
    filename = 'lesson2_corrupted.bin'
    with open(filename, 'wb') as f:
        f.write(struct.pack(RECORD_FORMAT, 1, 100.0, b'GOOD001'))
        f.write(struct.pack(RECORD_FORMAT, 2, 200.0, b'GOOD002'))
        f.write(b'\x00' * 10)  # Corrupted record
        f.write(struct.pack(RECORD_FORMAT, 4, 400.0, b'GOOD004'))
    
    print(f"\n‚úì Created '{filename}' with mixed valid/invalid records")
    
    # Mode 1: STRICT
    print("\n--- Mode 1: STRICT (fail on first error) ---")
    try:
        with open(filename, 'rb') as f:
            records = []
            record_num = 0
            while True:
                binary_data = f.read(RECORD_SIZE)
                if not binary_data:
                    break
                record_num += 1
                
                if len(binary_data) < RECORD_SIZE:
                    raise IOError(f"Incomplete record at {record_num}")
                
                record_id, value, code_bytes = struct.unpack(RECORD_FORMAT, binary_data)
                records.append({'id': record_id, 'value': value})
        print(f"‚úì Read {len(records)} records")
    except Exception as e:
        print(f"‚ùå ERROR: {e} ‚Üí Processing stopped")
    
    # Mode 2: SKIP_INVALID
    print("\n--- Mode 2: SKIP_INVALID (skip bad records) ---")
    with open(filename, 'rb') as f:
        records = []
        skipped = 0
        record_num = 0
        while True:
            binary_data = f.read(RECORD_SIZE)
            if not binary_data:
                break
            record_num += 1
            
            if len(binary_data) < RECORD_SIZE:
                print(f"  ‚ö† Skipping incomplete record {record_num}")
                skipped += 1
                break
            
            try:
                record_id, value, code_bytes = struct.unpack(RECORD_FORMAT, binary_data)
                records.append({'id': record_id, 'value': value})
            except:
                skipped += 1
    
    print(f"‚úì Valid: {len(records)}, Skipped: {skipped}")
    
    # Mode 3: COLLECT_ERRORS
    print("\n--- Mode 3: COLLECT_ERRORS (collect all errors) ---")
    with open(filename, 'rb') as f:
        records = []
        errors = []
        record_num = 0
        while True:
            binary_data = f.read(RECORD_SIZE)
            if not binary_data:
                break
            record_num += 1
            
            if len(binary_data) < RECORD_SIZE:
                errors.append({'record': record_num, 'error': 'incomplete'})
                break
            
            try:
                record_id, value, code_bytes = struct.unpack(RECORD_FORMAT, binary_data)
                records.append({'id': record_id, 'value': value})
            except Exception as e:
                errors.append({'record': record_num, 'error': str(e)})
    
    print(f"‚úì Valid: {len(records)}, Errors: {len(errors)}")
    for error in errors:
        print(f"  Record {error['record']}: {error['error']}")
    
    print("\nüìù Key Takeaway:")
    print("   - STRICT: Stop on first error (for critical data)")
    print("   - SKIP: Skip bad records, continue (for best effort)")
    print("   - COLLECT: Process all, collect errors (for auditing)")
    
    os.remove(filename)

In [None]:
lesson_2_error_handling()

LESSON 3: DATA VALIDATION

In [11]:
def lesson_3_validation():
    """Lesson 3: Validate record fields for data quality."""
    
    print("\n" + "="*70)
    print("LESSON 3: DATA VALIDATION")
    print("="*70)
    
    RECORD_FORMAT = '<id10s'
    RECORD_SIZE = struct.calcsize(RECORD_FORMAT)
    
    # Simple validator
    def validate_record(record, min_id=1, max_id=1000, min_val=0.0, max_val=1000.0):
        errors = []
        if record['id'] < min_id or record['id'] > max_id:
            errors.append(f"ID {record['id']} out of range [{min_id}-{max_id}]")
        if record['value'] < min_val or record['value'] > max_val:
            errors.append(f"Value {record['value']} out of range [{min_val}-{max_val}]")
        if not re.match(r'^[A-Z]{4}\d{3}$', record['code']):
            errors.append(f"Code '{record['code']}' doesn't match pattern")
        return len(errors) == 0, errors
    
    # Create test file
    filename = 'lesson3_validation.bin'
    test_records = [
        (1, 100.0, b'GOOD001'),    # Valid
        (1500, 200.0, b'GOOD002'),  # Invalid ID
        (3, 9999.0, b'GOOD003'),   # Invalid value
        (4, 400.0, b'BAD4'),       # Invalid code
    ]
    
    with open(filename, 'wb') as f:
        for record_id, value, code in test_records:
            f.write(struct.pack(RECORD_FORMAT, record_id, value, code))
    
    print(f"\n‚úì Created test file with {len(test_records)} records")
    print("\nValidation rules:")
    print("  - ID range: 1-1000")
    print("  - Value range: 0.0-1000.0")
    print("  - Code pattern: 4 uppercase letters + 3 digits (e.g., 'GOOD001')")
    
    # Read and validate
    with open(filename, 'rb') as f:
        valid = []
        invalid = []
        record_num = 0
        
        while True:
            binary_data = f.read(RECORD_SIZE)
            if not binary_data:
                break
            
            record_num += 1
            record_id, value, code_bytes = struct.unpack(RECORD_FORMAT, binary_data)
            code = code_bytes.decode('ascii').rstrip('\x00')
            
            record = {
                'num': record_num,
                'id': record_id,
                'value': value,
                'code': code
            }
            
            is_valid, errors = validate_record(record)
            
            if is_valid:
                valid.append(record)
            else:
                record['errors'] = errors
                invalid.append(record)
    
    print(f"\n‚úì Results: {len(valid)} valid, {len(invalid)} invalid\n")
    
    print("Valid records:")
    for rec in valid:
        print(f"  Record {rec['num']}: ID={rec['id']}, Value={rec['value']:.2f}, Code='{rec['code']}'")
    
    print("\nInvalid records:")
    for rec in invalid:
        print(f"  Record {rec['num']}: ID={rec['id']}, Value={rec['value']:.2f}, Code='{rec['code']}'")
        for error in rec['errors']:
            print(f"    ‚ùå {error}")
    
    print("\nüìù Key Takeaway:")
    print("   - Define clear validation rules")
    print("   - Validate each field (range, pattern, required)")
    print("   - Separate valid from invalid records")
    print("   - Collect detailed error messages")
    
    os.remove(filename)

In [None]:
lesson_3_validation()

LESSON 4: FILE INTEGRITY

In [14]:
def lesson_4_integrity():
    """Lesson 4: Check file integrity before reading."""
    
    print("\n" + "="*70)
    print("LESSON 4: FILE INTEGRITY CHECKING")
    print("="*70)
    
    RECORD_FORMAT = '<id10s'
    RECORD_SIZE = struct.calcsize(RECORD_FORMAT)
    
    def check_integrity(filepath, record_size):
        """Check file integrity."""
        issues = []
        filepath = Path(filepath)
        
        if not filepath.exists():
            issues.append("File does not exist")
            return False, issues
        
        if not filepath.is_file():
            issues.append("Not a file")
            return False, issues
        
        file_size = filepath.stat().st_size
        if file_size == 0:
            issues.append("File is empty")
            return False, issues
        
        if file_size % record_size != 0:
            remainder = file_size % record_size
            issues.append(f"File has {remainder} trailing bytes (may be truncated)")
        
        return len(issues) == 0, issues
    
    # Test 1: Good file
    good_file = 'lesson4_good.bin'
    with open(good_file, 'wb') as f:
        for i in range(3):
            f.write(struct.pack(RECORD_FORMAT, i+1, float((i+1)*100), f'CODE{i+1:03d}'.encode()))
    
    print("\nTest 1: Good file")
    is_valid, issues = check_integrity(good_file, RECORD_SIZE)
    print(f"  Result: {'‚úì PASSED' if is_valid else '‚ùå FAILED'}")
    if issues:
        for issue in issues:
            print(f"  {issue}")
    
    # Test 2: Truncated file
    truncated_file = 'lesson4_truncated.bin'
    with open(truncated_file, 'wb') as f:
        f.write(struct.pack(RECORD_FORMAT, 1, 100.0, b'CODE001'))
        f.write(b'\x00' * 10)  # Incomplete record
    
    print("\nTest 2: Truncated file")
    is_valid, issues = check_integrity(truncated_file, RECORD_SIZE)
    print(f"  Result: {'‚úì PASSED' if is_valid else '‚ùå FAILED'}")
    if issues:
        for issue in issues:
            print(f"  ‚ö† {issue}")
    
    print("\nüìù Key Takeaway:")
    print("   - Check file exists and is readable")
    print("   - Verify file size aligns with record size")
    print("   - Detect truncation early")
    print("   - Fail fast on critical issues")
    
    os.remove(good_file)
    os.remove(truncated_file)

In [None]:
lesson_4_integrity()

LESSON 5: CHARACTER ENCODING

In [16]:
def lesson_5_encoding():
    """Lesson 5: Handle different character encodings."""
    
    print("\n" + "="*70)
    print("LESSON 5: CHARACTER ENCODING")
    print("="*70)
    
    RECORD_FORMAT = '<id10s'
    RECORD_SIZE = struct.calcsize(RECORD_FORMAT)
    
    def decode_with_fallback(byte_data, primary='ascii', fallback='latin-1'):
        """Decode with fallback support."""
        try:
            return byte_data.decode(primary).rstrip('\x00'), primary
        except UnicodeDecodeError:
            try:
                return byte_data.decode(fallback).rstrip('\x00'), fallback
            except UnicodeDecodeError:
                return byte_data.decode(fallback, errors='replace').rstrip('\x00'), f'{fallback}(replace)'
    
    # Create test file with different encodings
    filename = 'lesson5_encoding.bin'
    test_codes = [
        b'ASCII123',      # Pure ASCII
        b'Test\x80\x90',  # Extended ASCII
    ]
    
    with open(filename, 'wb') as f:
        for i, code in enumerate(test_codes, 1):
            f.write(struct.pack(RECORD_FORMAT, i, float(i*100), code))
    
    print("\n‚úì Created file with mixed encodings")
    
    # Test ASCII-only (will fail)
    print("\n--- Test 1: ASCII-only decoding ---")
    with open(filename, 'rb') as f:
        record_num = 0
        while True:
            binary_data = f.read(RECORD_SIZE)
            if not binary_data:
                break
            record_num += 1
            _, _, code_bytes = struct.unpack(RECORD_FORMAT, binary_data)
            
            try:
                code = code_bytes.decode('ascii').rstrip('\x00')
                print(f"  ‚úì Record {record_num}: '{code}'")
            except UnicodeDecodeError:
                print(f"  ‚ùå Record {record_num}: Failed to decode with ASCII")
    
    # Test with fallback
    print("\n--- Test 2: With fallback to Latin-1 ---")
    with open(filename, 'rb') as f:
        record_num = 0
        while True:
            binary_data = f.read(RECORD_SIZE)
            if not binary_data:
                break
            record_num += 1
            _, _, code_bytes = struct.unpack(RECORD_FORMAT, binary_data)
            
            code, encoding_used = decode_with_fallback(code_bytes)
            print(f"  ‚úì Record {record_num}: '{code}' (decoded with {encoding_used})")
    
    print("\nüìù Key Takeaway:")
    print("   - ASCII: 0-127 (basic English)")
    print("   - Latin-1: 0-255 (Western European)")
    print("   - UTF-8: Variable length (international)")
    print("   - Always have a fallback encoding")
    print("   - Track when fallback is used")
    
    os.remove(filename)

In [None]:
lesson_5_encoding()

LESSON 6: COMPLETE MINI READER

In [18]:
def lesson_6_complete_reader():
    """Lesson 6: Complete mini reader putting it all together."""
    
    print("\n" + "="*70)
    print("LESSON 6: COMPLETE MINI READER")
    print("="*70)
    
    class MiniBinaryReader:
        """Compact but complete binary file reader."""
        
        def __init__(self, record_format='<id10s', error_mode='skip'):
            self.record_format = record_format
            self.record_size = struct.calcsize(record_format)
            self.error_mode = error_mode.lower()
            self.errors = []
        
        def read_file(self, filepath):
            """Read the binary file."""
            self.errors = []
            records = []
            
            # Check integrity
            if not Path(filepath).exists():
                raise IOError("File does not exist")
            
            file_size = Path(filepath).stat().st_size
            if file_size % self.record_size != 0:
                print(f"  ‚ö† Warning: File has {file_size % self.record_size} trailing bytes")
            
            # Read file
            with open(filepath, 'rb') as f:
                record_num = 0
                while True:
                    binary_data = f.read(self.record_size)
                    if not binary_data:
                        break
                    
                    record_num += 1
                    
                    if len(binary_data) < self.record_size:
                        error = {'record': record_num, 'error': 'incomplete'}
                        self.errors.append(error)
                        if self.error_mode == 'strict':
                            raise IOError(f"Incomplete record {record_num}")
                        break
                    
                    try:
                        record_id, value, code_bytes = struct.unpack(self.record_format, binary_data)
                        code = code_bytes.decode('ascii', errors='replace').rstrip('\x00')
                        records.append({'id': record_id, 'value': value, 'code': code})
                    except Exception as e:
                        error = {'record': record_num, 'error': str(e)}
                        self.errors.append(error)
                        if self.error_mode == 'strict':
                            raise
            
            return {
                'records': records,
                'errors': self.errors,
                'total': record_num,
                'valid': len(records),
                'invalid': len(self.errors)
            }
    
    # Create test file
    filename = 'lesson6_complete.bin'
    with open(filename, 'wb') as f:
        for i in range(5):
            f.write(struct.pack('<id10s', i+1, float((i+1)*111.11), f'CODE{i+1:03d}'.encode()))
    
    print("\n‚úì Created test file")
    
    # Use the complete reader
    reader = MiniBinaryReader(error_mode='skip')
    result = reader.read_file(filename)
    
    print(f"\n‚úì Read complete!")
    print(f"  Total processed: {result['total']}")
    print(f"  Valid records: {result['valid']}")
    print(f"  Invalid records: {result['invalid']}")
    
    print("\nSample records:")
    for record in result['records'][:3]:
        print(f"  ID: {record['id']}, Value: {record['value']:.2f}, Code: '{record['code']}'")
    
    print("\nüìù Key Takeaway:")
    print("   - Combine all techniques: integrity, errors, encoding")
    print("   - Provide comprehensive results and statistics")
    print("   - Make it reusable and configurable")
    print("   - Always track errors and warnings")
    
    os.remove(filename)

In [None]:
lesson_6_complete_reader()

MAIN EXECUTION

In [None]:
def main():
    """Run all lessons sequentially."""
    
    print("\n" + "="*70)
    print("BINARY FILE READER - MODULAR TUTORIAL")
    print("="*70)
    print("\nThis tutorial teaches binary file reading concepts in 6 lessons.")
    print("Each lesson is self-contained and demonstrates a key concept.")
    
    input("\nPress Enter to start Lesson 1...")
    lesson_1_basic_reading()
    
    input("\nPress Enter to continue to Lesson 2...")
    lesson_2_error_handling()
    
    input("\nPress Enter to continue to Lesson 3...")
    lesson_3_validation()
    
    input("\nPress Enter to continue to Lesson 4...")
    lesson_4_integrity()
    
    input("\nPress Enter to continue to Lesson 5...")
    lesson_5_encoding()
    
    input("\nPress Enter to continue to Lesson 6...")
    lesson_6_complete_reader()
    
    print("\n" + "="*70)
    print("TUTORIAL COMPLETE!")
    print("="*70)
    print("\nYou've learned:")
    print("  ‚úì Basic binary reading with struct")
    print("  ‚úì Three error handling modes")
    print("  ‚úì Data validation techniques")
    print("  ‚úì File integrity checking")
    print("  ‚úì Character encoding with fallback")
    print("  ‚úì Complete reader implementation")
    print("\nNext steps:")
    print("  ‚Ä¢ Review the Jupyter notebook for interactive practice")
    print("  ‚Ä¢ Study the full implementation in read_binary_file_improved.py")
    print("  ‚Ä¢ Adapt the MiniBinaryReader for your specific needs")
    print("\n" + "="*70 + "\n")


if __name__ == '__main__':
    main()