In [1]:
from py7zlib import Archive7z
from zipfile import ZipFile

import os
import time

In [2]:
base_path = os.path.join('/data', 'LCI', 'Ecoinvent', '3.2')

filename = 'current_Version_3.2_apos_lcia_ecoSpold02'
file_7z = os.path.join(base_path, filename + '.7z')
file_zip = os.path.join(base_path, filename + '.zip')

In [3]:
for i in (file_7z, file_zip):
    print('filesize: %d bytes [%s]' % (os.stat(i).st_size, i))

filesize: 108591748 bytes [/data/LCI/Ecoinvent/3.2/current_Version_3.2_apos_lcia_ecoSpold02.7z]
filesize: 591290984 bytes [/data/LCI/Ecoinvent/3.2/current_Version_3.2_apos_lcia_ecoSpold02.zip]


In [4]:
fnames = ('007891bb-6fc0-497e-802f-eebee2a1dfd6_359d6943-6e60-416b-93dc-9c58398b0179.spold',
          '82eeb0f7-77c4-486b-bece-e608f86f22de_f7a8981c-e94e-4fb7-908d-2786f6f0749c.spold',
          'b2c33b9b-1091-4bed-92a3-decec3b8bd78_8b2dfb58-660d-44eb-b83f-5d9e28e467db.spold',
          'f9694b1a-14f5-4c8c-863a-71132450b70f_5df09014-2ad5-467a-b48b-82e51d951f93.spold',)
targets = (os.path.join('datasets', fname) for fname in fnames)

In [5]:
def profile(j):
    print(j)
    t = time.time()
    print('%30.30s' % 'Opening 7z')
    with open(file_7z, 'rb') as fp:
        f = Archive7z(fp).getmember(j).read()
    print('%20.20s: %d' % ('Bytes read', len(f)))
    print('%30.30s (%6f s)' % ('7z finished', time.time() - t))
    
    t = time.time()
    print('\n%30.30s ' % 'Opening zip')
    with ZipFile(file_zip) as archive_zip:
        g = archive_zip.open(j).read()
    print('%20.20s: %d' % ('Bytes read', len(g)))
    print('%30.30s (%6f s)' % ('zip finished', time.time() - t))
    
    return f == g

In [6]:
profile(next(targets))

datasets/007891bb-6fc0-497e-802f-eebee2a1dfd6_359d6943-6e60-416b-93dc-9c58398b0179.spold
                    Opening 7z
          Bytes read: 372040
                   7z finished (1.141800 s)

                   Opening zip 
          Bytes read: 372040
                  zip finished (0.190654 s)


True

In [7]:
profile(next(targets))

datasets/82eeb0f7-77c4-486b-bece-e608f86f22de_f7a8981c-e94e-4fb7-908d-2786f6f0749c.spold
                    Opening 7z
          Bytes read: 364343
                   7z finished (48.180813 s)

                   Opening zip 
          Bytes read: 364343
                  zip finished (0.166550 s)


True

In [8]:
profile(next(targets))

datasets/b2c33b9b-1091-4bed-92a3-decec3b8bd78_8b2dfb58-660d-44eb-b83f-5d9e28e467db.spold
                    Opening 7z
          Bytes read: 362035
                   7z finished (52.042006 s)

                   Opening zip 
          Bytes read: 362035
                  zip finished (0.459998 s)


True

In [9]:
profile(next(targets))

datasets/f9694b1a-14f5-4c8c-863a-71132450b70f_5df09014-2ad5-467a-b48b-82e51d951f93.spold
                    Opening 7z
          Bytes read: 363595
                   7z finished (2.881122 s)

                   Opening zip 
          Bytes read: 363595
                  zip finished (0.134454 s)


True