In [20]:
import os
import gzip
import pickle
import io
import time
import tempfile

class DummyJob:
    def __init__(self, data):
        self.data = data

    def _sis_path(self, path_type):
        return os.path.join(tempfile.gettempdir(), f"dummy_job_{path_type}.pkl.gz")

def original_approach(job):
    start_time = time.time()
    with gzip.open(job._sis_path("original"), "wb") as f:
        pickle.dump(job, f)
    return time.time() - start_time

def new_approach(job):
    start_time = time.time()
    
    # Create gzipped pickle in memory
    with io.BytesIO() as memory_file:
        with gzip.GzipFile(fileobj=memory_file, mode='wb', compresslevel=9) as gzip_file:
            pickle.dump(job, gzip_file, protocol=pickle.HIGHEST_PROTOCOL)
        gzipped_pickle = memory_file.getvalue()
    
    # Write to disk
    with open(job._sis_path("new"), 'wb') as f:
        f.write(gzipped_pickle)
    
    return time.time() - start_time

# Create a dummy job with some data
dummy_job = DummyJob([i for i in range(10_000_000)])

# Run both approaches multiple times and average the results
num_runs = 1
original_times = []
new_times = []

for _ in range(num_runs):
    original_times.append(original_approach(dummy_job))
    new_times.append(new_approach(dummy_job))

print(f"Original approach average time: {sum(original_times) / num_runs:.4f} seconds")
print(f"New approach average time: {sum(new_times) / num_runs:.4f} seconds")

# Verify that both approaches produce the same output
with gzip.open(dummy_job._sis_path("original"), "rb") as f:
    original_data = pickle.load(f)

with gzip.open(dummy_job._sis_path("new"), "rb") as f:
    new_data = pickle.load(f)

print(f"Outputs are identical: {original_data.data == new_data.data}")

# Clean up temporary files
# os.remove(dummy_job._sis_path("original"))
# os.remove(dummy_job._sis_path("new"))

Original approach average time: 0.1288 seconds
New approach average time: 0.1279 seconds
Outputs are identical: True


In [19]:
dummy_job._sis_path("original")

'/var/folders/zr/1470x5fs5ndbwlp_skwdntcc0000gn/T/dummy_job_original.pkl.gz'

In [12]:
# os.remove(dummy_job._sis_path("original"))