In [1]:
import os
import fnmatch
import gzip
import bz2
import re

# Generator to find all files matching a pattern in a directory tree
def gen_find(filepat, top):
    '''
    Find all filenames in a directory tree that match a shell wildcard pattern.
    '''
    for path, dirlist, filelist in os.walk(top):
        for name in fnmatch.filter(filelist, filepat):
            yield os.path.join(path, name)

# Generator to open files one at a time
def gen_opener(filenames):
    '''
    Open a sequence of filenames one at a time, producing a file object.
    The file is closed immediately when proceeding to the next iteration.
    '''
    for filename in filenames:
        if filename.endswith('.gz'):
            f = gzip.open(filename, 'rt')  # Open gzip files in text mode
        elif filename.endswith('.bz2'):
            f = bz2.open(filename, 'rt')  # Open bz2 files in text mode
        else:
            f = open(filename, 'rt')  # Open regular text files
        yield f
        f.close()  # Close the file after processing

# Generator to concatenate multiple iterators into a single sequence
def gen_concatenate(iterators):
    '''
    Chain a sequence of iterators together into a single sequence.
    '''
    for it in iterators:
        yield from it  # Delegate to the sub-iterator

# Generator to filter lines matching a regex pattern
def gen_grep(pattern, lines):
    '''
    Look for a regex pattern in a sequence of lines.
    '''
    pat = re.compile(pattern)
    for line in lines:
        if pat.search(line):
            yield line

# Main pipeline
if __name__ == '__main__':
    # Create a sample directory structure and log files
    log_dir = 'logs'
    os.makedirs(log_dir, exist_ok=True)

    # Sample log file 1: access-log-012007.gz
    with gzip.open(os.path.join(log_dir, 'access-log-012007.gz'), 'wt') as f:
        f.write('124.115.6.12 - - [10/Jul/2012:00:18:50 -0500] "GET /robots.txt ..." 200 71\n')
        f.write('210.212.209.67 - - [10/Jul/2012:00:18:51 -0500] "GET /ply/ ..." 200 11875\n')
        f.write('210.212.209.67 - - [10/Jul/2012:00:18:51 -0500] "GET /python/ ..." 200 369\n')

    # Sample log file 2: access-log-022007.bz2
    with bz2.open(os.path.join(log_dir, 'access-log-022007.bz2'), 'wt') as f:
        f.write('61.135.216.105 - - [10/Jul/2012:00:20:04 -0500] "GET /blog/atom.xml ..." 304 -\n')
        f.write('210.212.209.67 - - [10/Jul/2012:00:18:51 -0500] "GET /python/tutorial ..." 200 1234\n')

    # Sample log file 3: access-log-032007 (uncompressed)
    with open(os.path.join(log_dir, 'access-log-032007'), 'wt') as f:
        f.write('124.115.6.12 - - [10/Jul/2012:00:18:50 -0500] "GET /robots.txt ..." 200 71\n')
        f.write('210.212.209.67 - - [10/Jul/2012:00:18:51 -0500] "GET /python/docs ..." 200 5678\n')

    # Define the directory and file pattern
    file_pattern = 'access-log*'  # Pattern to match log files

    # Step 1: Find all log files matching the pattern
    lognames = gen_find(file_pattern, log_dir)

    # Step 2: Open each file one at a time
    files = gen_opener(lognames)

    # Step 3: Concatenate all lines from all files into a single sequence
    lines = gen_concatenate(files)

    # Step 4: Filter lines containing the word "python" (case-insensitive)
    pylines = gen_grep('(?i)python', lines)

    # Step 5: Extract the byte count from each line (last column)
    bytecolumn = (line.rsplit(None, 1)[1] for line in pylines)

    # Step 6: Convert byte counts to integers (ignore lines with '-')
    bytes = (int(x) for x in bytecolumn if x != '-')

    # Step 7: Sum the total bytes transferred
    total_bytes = sum(bytes)

    # Output the result
    print(f'Total bytes transferred: {total_bytes}')

Total bytes transferred: 7281
