**Before Optimization**

In [None]:
import csv
import time
import psutil
import os

csv_file = 'cleaned_data.csv'
output_csv = 'performance_before.csv'  # The CSV file where you will save the stats

def process_data_linearly():
    rows = []
    with open(csv_file, mode='r', encoding='utf-8') as file:
        reader = csv.reader(file)
        headers = next(reader)  # skip header
        for row in reader:
            # simulate a basic "processing" step
            cleaned = [field.strip().lower() for field in row]
            rows.append(cleaned)
    return rows

if __name__ == "__main__":
    # Define the optimization stage as a variable
    optimization_stage = "Before Optimization"

    # Start measuring time and system resource usage
    start_time = time.time()
    process = psutil.Process(os.getpid())
    cpu_start = psutil.cpu_percent(interval=None)
    memory_start = process.memory_info().rss / (1024 * 1024)  # MB

    data = process_data_linearly()

    # Measure system resources and time after processing
    end_time = time.time()
    cpu_end = psutil.cpu_percent(interval=None)
    memory_end = process.memory_info().rss / (1024 * 1024)  # MB

    # Calculate stats
    total_rows = len(data)
    total_time = end_time - start_time
    cpu_usage = cpu_end - cpu_start
    memory_usage = memory_end - memory_start
    throughput = total_rows / total_time
    records_per_second = total_rows / total_time

    # Print the results with two decimal points
    print(f"\n✅ Data Processing Complete (Before Optimization)")
    print(f"📄 Total Rows: {total_rows}")
    print(f"🕒 Total Processing Time: {total_time:.2f} seconds")
    print(f"🧠 CPU Usage: {cpu_usage:.2f}%")
    print(f"💾 Memory Usage: {memory_usage:.2f} MB")
    print(f"⚡ Throughput: {throughput:.2f} records/second")
    print(f"🔹 Data processed: {total_rows} rows")
    print(f"⏱️ Time taken (before optimization): {total_time:.2f} seconds")
    print(f"📈 Records/sec: {records_per_second:.2f}")

    # Write the results to a CSV file with the optimization stage as a variable
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        # Write the header row with column names
        writer.writerow([
            "Optimization Stage", "Total Rows", "Total Processing Time (seconds)",
            "CPU Usage (%)", "Memory Usage (MB)", "Throughput (records/second)",
            "Data Processed (rows)", "Time Taken (seconds)", "Records per second"
        ])

        # Write the row with the variable 'optimization_stage' and the statistics
        writer.writerow([
            optimization_stage,
            total_rows,
            f"{total_time:.2f}",
            f"{cpu_usage:.2f}",
            f"{memory_usage:.2f}",
            f"{throughput:.2f}",
            total_rows,
            f"{total_time:.2f}",
            f"{records_per_second:.2f}"
        ])

    print(f"\n📂 Statistics written to {output_csv}")



✅ Data Processing Complete (Before Optimization)
📄 Total Rows: 175545
🕒 Total Processing Time: 3.82 seconds
🧠 CPU Usage: 59.10%
💾 Memory Usage: 283.59 MB
⚡ Throughput: 45932.92 records/second
🔹 Data processed: 175545 rows
⏱️ Time taken (before optimization): 3.82 seconds
📈 Records/sec: 45932.92

📂 Statistics written to performance_before.csv
