In [26]:
import random
import string
from datetime import datetime
from typing import Generator

class LogGenerator:
    
    LOG_TYPES = {}

    def generate(self, log_type: str, number: int) -> Generator[str, None, None]:
        for _ in range(number):
            # Generate log data based on log_type
            if log_type == "access":
                method = random.choice(['GET', 'POST', 'PUT', 'DELETE'])
                endpoint = random.choice(['/home', '/about', '/contact'])
                status_code = random.randint(200, 500)
                response_time = random.randint(100, 10000)
                browser = random.choice(['Chrome', 'Firefox', 'Safari', 'Edge'])
                yield f"{method} {endpoint} {status_code} {response_time} {browser}"

            elif log_type == "error":
                level = random.choice(['ERROR', 'WARNING', 'INFO'])
                error_type = random.choice(['File not found', 'Syntax error', 'Database connection error'])
                line_number = random.randint(1, 100)
                file_name = random.choice(['app.py', 'database.py', 'utils.py'])
                yield f"{level}: {error_type} at line {line_number} in {file_name}"

            elif log_type == "security":
                event_type = random.choice(['LOGIN', 'LOGOUT', 'PASSWORD_CHANGE'])
                user_id = ''.join(random.choices(string.ascii_uppercase + string.digits, k=8))
                ip_address = '.'.join(str(random.randint(0, 255)) for _ in range(4))
                timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                yield f"{event_type} {user_id} {ip_address} {timestamp}"         
            else:
                # Invalid log_type
                raise ValueError("Invalid log_type")
            
    def to_file(self, log_type, number, file_name):
        with open(file_name, "w") as f:
            for log in self.generate(log_type, number):
                f.write(log + '\n')

log_gen = LogGenerator()

for log in log_gen.generate("access", 10):
    print(log)

for log in log_gen.generate("error", 10):
    print(log)

for log in log_gen.generate("security", 10):
    print(log)

log_gen.to_file("error", 100_000, "large_log_file.log")

POST /contact 459 1930 Chrome
done
DELETE /contact 489 5686 Safari
done
DELETE /home 398 2438 Firefox
done
PUT /about 440 1227 Firefox
done
DELETE /about 355 6852 Edge
done
POST /contact 337 3475 Chrome
done
GET /about 208 8365 Firefox
done
DELETE /about 222 8541 Chrome
done
GET /contact 405 8105 Safari
done
POST /home 305 9200 Safari
done
ERROR: Syntax error at line 17 in database.py
ERROR: Database connection error at line 46 in database.py
ERROR: File not found at line 57 in app.py
ERROR: Database connection error at line 95 in app.py
ERROR: Database connection error at line 83 in database.py
ERROR: Database connection error at line 68 in app.py
ERROR: File not found at line 37 in utils.py
LOGIN 6VKURXLL 66.178.155.198 2023-08-20 00:55:02
LOGIN NSAW0DEZ 192.56.150.198 2023-08-20 00:55:02
LOGOUT S7WIFC2A 169.126.78.41 2023-08-20 00:55:02
PASSWORD_CHANGE U1LIIICM 215.6.189.28 2023-08-20 00:55:02
PASSWORD_CHANGE 60UV52HC 11.146.209.109 2023-08-20 00:55:02
LOGOUT E75Y5HEG 134.193.141.71

In [37]:
from pathlib import Path

def yield_lines_from(file_path: Path | str) -> Generator[str, None, None]:
    with open(file_path, 'r') as file:
        for line in file:
            yield line.strip()

def parse_error_log(log_line: str):
    log_data = {}
    log_parts = log_line.split(": ")
    log_data["level"] = log_parts[0]
    error_parts = log_parts[1].split(" at line ")
    log_data["error_type"] = error_parts[0]
    line_parts = error_parts[1].split(" in ")
    log_data["line_number"] = int(line_parts[0])
    log_data["file_name"] = line_parts[1]
    yield log_data

def process_lines(lines: Generator[str, None, None]) -> Generator[dict, None, None]:
    for line in lines:
        yield from parse_error_log(line)
    

# lines_generator = yield_lines('large_log_file.log')
# processed_lines_generator = process_lines(lines_generator)
# for i, data in enumerate(processed_lines_generator):
#     if i > 10:
#         break
#     # Process data further
#     print(data)

for i, data in enumerate(process_lines(yield_lines_from('large_log_file.log'))):
    if i > 10:
        break
    # Process data further
    print(data)


{'level': 'ERROR', 'error_type': 'Syntax error', 'line_number': 51, 'file_name': 'database.py'}
{'level': 'INFO', 'error_type': 'Database connection error', 'line_number': 60, 'file_name': 'app.py'}
{'level': 'ERROR', 'error_type': 'Syntax error', 'line_number': 59, 'file_name': 'utils.py'}
{'level': 'INFO', 'error_type': 'Syntax error', 'line_number': 18, 'file_name': 'database.py'}
{'level': 'ERROR', 'error_type': 'File not found', 'line_number': 2, 'file_name': 'database.py'}
{'level': 'INFO', 'error_type': 'File not found', 'line_number': 12, 'file_name': 'app.py'}
{'level': 'ERROR', 'error_type': 'Syntax error', 'line_number': 12, 'file_name': 'utils.py'}
