In [None]:
import os
import sys
import zipfile
from collections import defaultdict

In [None]:
def extract_zip(zip_path, extract_to="logs"):
    """
    Extracts a ZIP file to the specified directory.

    :param zip_path: Path to the ZIP file
    :param extract_to: Directory to extract files to
    :return: Path of the extracted log file (assumes a single large log file)
    """
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)

    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_to)  # Extract ZIP contents

    extracted_files = os.listdir(extract_to)
    if not extracted_files:
        print("❌ No files found in the ZIP archive.")
        sys.exit(1)

    # Assume the first extracted file is the log file (adjust if needed)
    log_file = os.path.join(extract_to, extracted_files[0])
    print(f"✅ Log file extracted: {log_file}")

    return log_file

In [None]:
def extract_logs_by_date(log_file):
    """
    Reads a large log file (~1TB) and extracts logs for each day into separate output files.

    :param log_file: Path to the extracted log file
    """
    output_dir = "output"
    os.makedirs(output_dir, exist_ok=True)  # Ensure output directory exists

    file_handles = defaultdict(lambda: None)  # Dictionary for open file handles

    try:
        with open(log_file, "r", encoding="utf-8") as infile:
            for line in infile:
                date = line.split(" ")[0]  # Extract YYYY-MM-DD from log entry

                if file_handles[date] is None:  # Open a new file for this date if not already open
                    file_handles[date] = open(os.path.join(output_dir, f"output_{date}.txt"), "w", encoding="utf-8")

                file_handles[date].write(line)  # Write log entry to the corresponding file

        print(f"✅ Logs for all days saved in '{output_dir}' directory.")

    except Exception as e:
        print(f"❌ Error processing log file: {e}")

    finally:
        # Close all open file handles
        for f in file_handles.values():
            if f:
                f.close()

In [None]:
if __name__ == "__main__":
    # if len(sys.argv) != 2: # Commenting out original conditional for command-line execution
    #     print("Usage: python process_logs.py <zip_file>")
    #     sys.exit(1)

    # zip_file = sys.argv[1] # Commenting out original ZIP file path retrieval

    # Provide the ZIP file path directly
    zip_file = "your_zip_file.zip"  # Replace with the actual path to your ZIP file

    if not os.path.exists(zip_file):
        print(f"❌ Error: ZIP file '{zip_file}' not found.")
        sys.exit(1)

    # Extract ZIP and process logs
    log_file = extract_zip(zip_file)
    extract_logs_by_date(log_file)