# MD5 Checksum Verification Tests

This notebook tests the `VerifyMD5` class functionality.

In [18]:
import os
import tempfile
import json
from verify_md5sum_file import VerifyMD5

## Create Test Files

In [19]:
# Create temporary directory and files for testing
temp_dir = tempfile.mkdtemp()
print(f"Created temporary directory: {temp_dir}")

# Create test files with known content
test_files = {
    "file1.txt": "This is test file 1.",
    "file2.txt": "This is test file 2 with different content."
}

for filename, content in test_files.items():
    file_path = os.path.join(temp_dir, filename)
    with open(file_path, 'w') as f:
        f.write(content)
    print(f"Created test file: {file_path}")

Created temporary directory: /tmp/tmpe8ish1ib
Created test file: /tmp/tmpe8ish1ib/file1.txt
Created test file: /tmp/tmpe8ish1ib/file2.txt


## Generate MD5 Checksums

In [20]:
# Generate MD5 checksums for the test files
import hashlib

checksums = {}
for filename, content in test_files.items():
    file_path = os.path.join(temp_dir, filename)
    md5 = hashlib.md5(content.encode()).hexdigest()
    checksums[filename] = md5
    print(f"{filename}: {md5}")

# Create a checksum file
checksum_file = os.path.join(temp_dir, "checksums.md5")
with open(checksum_file, 'w') as f:
    for filename, md5 in checksums.items():
        f.write(f"{md5} {filename}\n")

print(f"Created checksum file: {checksum_file}")

file1.txt: 2eeac9eefe630ce583207a174cee109d
file2.txt: b4858e2ea85a0a5bd234eddba9fcfb66
Created checksum file: /tmp/tmpe8ish1ib/checksums.md5


## Test the Verify MD5 Class

In [21]:
# Create the state file path
state_file = os.path.join(temp_dir, "download_state.json")

# Initialize the VerifyMD5 class
verifier = VerifyMD5(state_file_path=state_file, max_threads=2, tool="test")

# Load checksums from file
loaded_checksums = verifier.load_checksum_file(checksum_file)
print("Loaded checksums:")
print(loaded_checksums)

Loaded checksums:
{'file1.txt': '2eeac9eefe630ce583207a174cee109d', 'file2.txt': 'b4858e2ea85a0a5bd234eddba9fcfb66'}


In [22]:
# Run verification
verifier.verify_files(loaded_checksums, base_dir=temp_dir)

Thread 161447: Starting verification of file1.txt
Thread 161448: Starting verification of file2.txt
Thread 161447: Finished verification of file1.txt. Valid: True
Thread 161448: Finished verification of file2.txt. Valid: True
All verifications completed. Results saved to /tmp/tmpe8ish1ib/download_state.json


## Examine the Results

In [23]:
# Check the state file
with open(state_file, 'r') as f:
    state = json.load(f)

print(json.dumps(state, indent=4))

{
    "file1.txt": {
        "status": "completed",
        "timestamp": "2025-03-11 11:32:00.991708",
        "md5": "2eeac9eefe630ce583207a174cee109d",
        "path": "/tmp/tmpe8ish1ib/file1.txt",
        "verified_with_md5": true,
        "checksum_valid": true,
        "tool": "test"
    },
    "file2.txt": {
        "status": "completed",
        "timestamp": "2025-03-11 11:32:00.992025",
        "md5": "b4858e2ea85a0a5bd234eddba9fcfb66",
        "path": "/tmp/tmpe8ish1ib/file2.txt",
        "verified_with_md5": true,
        "checksum_valid": true,
        "tool": "test"
    }
}


## Test with Invalid Checksums

In [24]:
# Create invalid checksums
invalid_checksums = {filename: "invalid_md5_checksum" for filename in test_files.keys()}

# Initialize new verifier
invalid_state_file = os.path.join(temp_dir, "invalid_state.json")
invalid_verifier = VerifyMD5(state_file_path=invalid_state_file, max_threads=2)

# Run verification with invalid checksums
invalid_verifier.verify_files(invalid_checksums, base_dir=temp_dir)

Thread 161451: Starting verification of file1.txt
Thread 161452: Starting verification of file2.txt
Thread 161451: Finished verification of file1.txt. Valid: False
Thread 161452: Finished verification of file2.txt. Valid: False
All verifications completed. Results saved to /tmp/tmpe8ish1ib/invalid_state.json


In [25]:
# Check the invalid state file
with open(invalid_state_file, 'r') as f:
    invalid_state = json.load(f)

print(json.dumps(invalid_state, indent=4))

{
    "file1.txt": {
        "status": "failed",
        "timestamp": "2025-03-11 11:32:01.017682",
        "md5": "2eeac9eefe630ce583207a174cee109d",
        "path": "/tmp/tmpe8ish1ib/file1.txt",
        "verified_with_md5": true,
        "checksum_valid": false,
        "tool": "lftp"
    },
    "file2.txt": {
        "status": "failed",
        "timestamp": "2025-03-11 11:32:01.019032",
        "md5": "b4858e2ea85a0a5bd234eddba9fcfb66",
        "path": "/tmp/tmpe8ish1ib/file2.txt",
        "verified_with_md5": true,
        "checksum_valid": false,
        "tool": "lftp"
    }
}


## Cleanup

In [26]:
# Cleanup temporary files
import shutil
shutil.rmtree(temp_dir)
print(f"Removed temporary directory: {temp_dir}")

Removed temporary directory: /tmp/tmpe8ish1ib
