Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

initial commit

  • Loading branch information...
commit 5f2d0f9b1e59a5865f1b429b9094b66cc7ea1659 0 parents
@rfk rfk authored
8 .gitignore
@@ -0,0 +1,8 @@
+*.pyc
+*.pyo
+*~
+.*.sw*
+MANIFEST
+build
+dist
+zipimportx/tests/lib*
5 ChangeLog.txt
@@ -0,0 +1,5 @@
+
+v0.1.0:
+
+ * initial release; you might say *everything* has changed.
+
54 README.txt
@@ -0,0 +1,54 @@
+
+
+zipimportx: faster zip imports using pre-processed index files
+===============================================================
+
+
+This package aims to speed up imports from zipfiles, by pre-computing the
+"directory information" dictionary and storing in a separate index file.
+This reduces the time spent parsing information out of the zipfile.
+
+It exports a single useful name, zipimporter, which is a drop-in replacement
+for the standard zipimporter class.
+
+To create an index for a given zipfile, do the following::
+
+ from zipimportx import zipimporter
+ zipimporter("mylib.zip").write_index()
+
+This will create two files, "mylib.zip.win32.idx" and "mylib.zip.posix.idx",
+containing the zipfile directory information pre-parsed and formatted to
+different path-naming conventions. (Specifically, they contain a marshalled
+dictionary similar to those found in zipimport._zip_directory_cache.)
+
+To enable use of these index files, simply replace the builtin zipimport
+mechanism with zipimportx by doing the following::
+
+ import zipimportx
+ zipimportx.zipimporter.install()
+
+In my tests, use of these indexes speeds up the loading of a zipfile by about
+a factor of 3 on Linux, and a factor of 5 on Windows.
+
+Note that this package uses nothing but builtin modules. To bootstrap zipfile
+imports for a frozen application, you can inline the module's code directly
+into your application's startup script. Do this somewhere in your build::
+
+ import zipimportx
+ import inspect
+
+ SCRIPT = '''
+ %s
+ zipimporter.install()
+ import myapp
+ myapp.main()
+ ''' % (inspect.getsource(zipimportx),)
+
+ freeze_this_script_somehow(SCRIPT)
+ zipimportx.zipimporter("path/to/frozen/library.zip").write_indexes()
+
+Note also that imports will almost certainly *break* if the index does not
+reflect the actual contents of the zipfile. This module is therefore most
+useful for frozen apps and other situations where the zipfile is not expected
+to change.
+
67 setup.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2009-2010, Cloud Matrix Pty. Ltd.
+# All rights reserved; available under the terms of the BSD License.
+
+import sys
+setup_kwds = {}
+if sys.version_info > (3,):
+ from setuptools import setup
+ setup_kwds["test_suite"] = "zipimportx.tests"
+ setup_kwds["use_2to3"] = True
+else:
+ from distutils.core import setup
+
+# This awfulness is all in aid of grabbing the version number out
+# of the source code, rather than having to repeat it here. Basically,
+# we parse out all lines starting with "__version__" and execute them.
+try:
+ next = next
+except NameError:
+ def next(i):
+ return i.next()
+info = {}
+try:
+ src = open("zipimportx/__init__.py")
+ lines = []
+ ln = next(src)
+ while "__version__" not in ln:
+ lines.append(ln)
+ ln = next(src)
+ while "__version__" in ln:
+ lines.append(ln)
+ ln = next(src)
+ exec("".join(lines),info)
+except Exception:
+ raise
+ pass
+print info
+
+
+NAME = "zipimportx"
+VERSION = info["__version__"]
+DESCRIPTION = "faster zip imports using pre-processed index files"
+AUTHOR = "Ryan Kelly"
+AUTHOR_EMAIL = "rfk@cloudmatrix.com.au"
+URL = "http://github.com/cloudmatrix/zipimportx/"
+LICENSE = "BSD"
+KEYWORDS = "zipfile zip import"
+LONG_DESC = info["__doc__"]
+
+PACKAGES = ["zipimportx","zipimportx.tests"]
+EXT_MODULES = []
+PKG_DATA = {}
+
+setup(name=NAME,
+ version=VERSION,
+ author=AUTHOR,
+ author_email=AUTHOR_EMAIL,
+ url=URL,
+ description=DESCRIPTION,
+ long_description=LONG_DESC,
+ keywords=KEYWORDS,
+ packages=PACKAGES,
+ ext_modules=EXT_MODULES,
+ package_data=PKG_DATA,
+ license=LICENSE,
+ **setup_kwds
+ )
+
192 zipimportx/__init__.py
@@ -0,0 +1,192 @@
+# Copyright (c) 2009-2010, Cloud Matrix Pty. Ltd.
+# All rights reserved; available under the terms of the BSD License.
+"""
+
+zipimportx: faster zip imports using pre-processed index files
+===============================================================
+
+
+This package aims to speed up imports from zipfiles, by pre-computing the
+"directory information" dictionary and storing in a separate index file.
+This reduces the time spent parsing information out of the zipfile.
+
+It exports a single useful name, zipimporter, which is a drop-in replacement
+for the standard zipimporter class.
+
+To create an index for a given zipfile, do the following::
+
+ from zipimportx import zipimporter
+ zipimporter("mylib.zip").write_index()
+
+This will create two files, "mylib.zip.win32.idx" and "mylib.zip.posix.idx",
+containing the zipfile directory information pre-parsed and formatted to
+different path-naming conventions. (Specifically, they contain a marshalled
+dictionary similar to those found in zipimport._zip_directory_cache.)
+
+To enable use of these index files, simply replace the builtin zipimport
+mechanism with zipimportx by doing the following::
+
+ import zipimportx
+ zipimportx.zipimporter.install()
+
+In my tests, use of these indexes speeds up the loading of a zipfile by about
+a factor of 3 on Linux, and a factor of 5 on Windows.
+
+Note that this package uses nothing but builtin modules. To bootstrap zipfile
+imports for a frozen application, you can inline the module's code directly
+into your application's startup script. Do this somewhere in your build::
+
+ import zipimportx
+ import inspect
+
+ SCRIPT = '''
+ %s
+ zipimporter.install()
+ import myapp
+ myapp.main()
+ ''' % (inspect.getsource(zipimportx),)
+
+ freeze_this_script_somehow(SCRIPT)
+ zipimportx.zipimporter("path/to/frozen/library.zip").write_indexes()
+
+Note also that imports will almost certainly *break* if the index does not
+reflect the actual contents of the zipfile. This module is therefore most
+useful for frozen apps and other situations where the zipfile is not expected
+to change.
+
+"""
+
+__ver_major__ = 0
+__ver_minor__ = 1
+__ver_patch__ = 0
+__ver_sub__ = ""
+__ver_tuple__ = (__ver_major__,__ver_minor__,__ver_patch__,__ver_sub__)
+__version__ = "%d.%d.%d%s" % __ver_tuple__
+
+
+import sys
+import marshal
+import zipimport
+
+
+class zipimporter(zipimport.zipimporter):
+ """A zipimporter that can use pre-processed index files.
+
+ """
+
+ def __init__(self,archivepath):
+ if archivepath not in zipimport._zip_directory_cache:
+ # Pre-populate the zip directory cache using the index file.
+ if sys.platform == "win32":
+ idxfile = archivepath+".win32.idx"
+ else:
+ idxfile = archivepath+".posix.idx"
+ try:
+ with open(idxfile,"rb") as f:
+ index = marshal.load(f)
+ zipimport._zip_directory_cache[archivepath] = index
+ except EnvironmentError:
+ pass
+ super(zipimporter,self).__init__(archivepath)
+
+ def load_module(self,fullname):
+ """load_module(fullname) -> module.
+
+ Load the module specified by 'fullname'. 'fullname' must be the
+ fully qualified (dotted) module name. It returns the imported
+ module, or raises ZipImportError if it wasn't found.
+ """
+ self._fix_filename(fullname)
+ return super(zipimporter,self).load_module(fullname)
+
+ def get_code(self,fullname):
+ """get_code(fullname) -> code object.
+
+ Return the code object for the specified module. Raise ZipImportError
+ if the module couldn't be found.
+ """
+ self._fix_filename(fullname)
+ return super(zipimporter,self).get_code(fullname)
+
+ def _get_filename(self,fullname):
+ """_get_filename(fullname) -> filename string.
+
+ Return the filename for the specified module.
+ """
+ self._fix_filename(fullname)
+ return super(zipimporter,self)._get_filename(fullname)
+
+ def _fix_filename(self,fullname):
+ """Fix the __file__ entry in the TOC for the given module.
+
+ Since the pre-processed index doesn't store filename information,
+ this must be added back into the TOC when it's needed. Fortunately
+ it's trivial to calculate.
+ """
+ SEP = "\\" if sys.platform == "win32" else "/"
+ modpath = self.prefix
+ if not modpath.endswith(SEP):
+ modpath += SEP
+ modpath += fullname.replace(".",SEP)
+ for suffix in (".py",".pyc",".pyo"):
+ for extra in ("",SEP+"__init__"):
+ path = modpath + extra + suffix
+ try:
+ info = self._files[path]
+ if info[0] != "":
+ return # already fixed
+ info = (self.archive + SEP + path,) + info[1:]
+ self._files[path] = info
+ except KeyError:
+ pass
+
+ def write_index(self):
+ """Create pre-processed index files for this zipimport archive.
+
+ This method creates files <archive>.posix.idx and <archive>.win32.idx
+ containing a pre-processes index of the zipfile contents found in the
+ file <archive>. This index can then be used to speed up loading of
+ the zipfile.
+ """
+ index = zipimport._zip_directory_cache[self.archive].copy()
+ # Don't store the __file__ field, it won't be correct.
+ # Besides, we can re-create it as needed.
+ for (key,info) in index.iteritems():
+ index[key] = ("",) + info[1:]
+ # Store separate index for win32-format and posix-format paths.
+ # Yuck, but fixing them up at import time is a big performance hit.
+ if sys.platform == "win32":
+ win32_index = index
+ posix_index = {}
+ for (key,info) in index.iteritems():
+ posix_index[key.replace("\\","/")] = info
+ else:
+ posix_index = index
+ win32_index = {}
+ for (key,info) in index.iteritems():
+ win32_index[key.replace("/","\\")] = info
+ with open(self.archive+".win32.idx","wb") as f:
+ marshal.dump(win32_index,f)
+ with open(self.archive+".posix.idx","wb") as f:
+ marshal.dump(posix_index,f)
+
+ @classmethod
+ def install(cls):
+ """Install this class into the import machinery.
+
+ This class method installs the custom zipimporter class into the import
+ machinery of the running process, relacing any of its superclasses
+ that may be there.
+ """
+ installed = False
+ for i,imp in enumerate(sys.path_hooks):
+ try:
+ if issubclass(cls,imp):
+ sys.path_hooks[i] = cls
+ installed = True
+ except TypeError:
+ pass
+ if not installed:
+ sys.path_hooks.append(cls)
+
+
118 zipimportx/tests/__init__.py
@@ -0,0 +1,118 @@
+
+import os
+import unittest
+import timeit
+import zipimport
+import zipfile
+
+import zipimportx
+
+# I don't actually use these, I just add them to a zipfile.
+# There here so I can grab __file__ off them.
+import distutils
+import logging
+import email
+import sqlite3
+import ctypes
+
+LIBHOME = os.path.dirname(unittest.__file__)
+
+
+class TestZipImportX(unittest.TestCase):
+
+ def setUp(self):
+ lib = "libsmall.zip"
+ lib = os.path.abspath(os.path.join(os.path.dirname(__file__),lib))
+ if not os.path.exists(lib):
+ zf = zipfile.PyZipFile(lib,"w")
+ zf.writepy(os.path.dirname(zipimportx.__file__))
+ zf.writepy(os.path.dirname(distutils.__file__))
+ zf.writepy(os.path.dirname(logging.__file__))
+ zf.writepy(os.path.dirname(email.__file__))
+ zf.writepy(os.path.dirname(sqlite3.__file__))
+ zf.writepy(os.path.dirname(ctypes.__file__))
+ zf.close()
+ lib = "libmedium.zip"
+ lib = os.path.abspath(os.path.join(os.path.dirname(__file__),lib))
+ if not os.path.exists(lib):
+ zf = zipfile.PyZipFile(lib,"w")
+ zf.writepy(LIBHOME)
+ zf.close()
+ lib = "liblarge.zip"
+ lib = os.path.abspath(os.path.join(os.path.dirname(__file__),lib))
+ if not os.path.exists(lib):
+ zf = zipfile.PyZipFile(lib,"w")
+ zf.writepy(LIBHOME)
+ for (dirnm,subdirs,files) in os.walk(LIBHOME):
+ if "__init__.pyc" in files:
+ del subdirs[:]
+ zf.writepy(dirnm)
+ zf.close()
+
+ def test_performance_increase(self):
+ ratios = {
+ "libsmall.zip": 2.5,
+ "libmedium.zip": 3,
+ "liblarge.zip": 3.5,
+ }
+ for libnm in ratios:
+ lib = os.path.abspath(os.path.join(os.path.dirname(__file__),libnm))
+ (zt,xt) = self._do_timeit_compare(lib)
+ self.assertTrue(zt/xt > ratios[libnm])
+
+ def test_space_overhead(self):
+ for lib in ("libsmall.zip","libmedium.zip","liblarge.zip"):
+ lib = os.path.abspath(os.path.join(os.path.dirname(__file__),lib))
+ zipimportx.zipimporter(lib).write_index()
+ z_size = os.stat(lib).st_size
+ x_size_p = os.stat(lib+".posix.idx").st_size
+ x_size_w = os.stat(lib+".win32.idx").st_size
+ self.assertEquals(x_size_p,x_size_w)
+ self.assertTrue(z_size / x_size_p > 40)
+
+ def _do_timeit_compare(self,lib):
+ """Return a pair (ztime,xtime) giving unindexed and indexed times."""
+ z_setupcode = "import zipimport"
+ z_testcode = "zipimport._zip_directory_cache.clear(); " \
+ "zipimport.zipimporter(%r)" % (lib,)
+ z_timer = timeit.Timer(z_testcode,z_setupcode)
+ z_time = min(self._do_timeit3(z_timer))
+ x_setupcode = "import zipimport; import zipimportx; " \
+ "zipimportx.zipimporter(%r).write_index()" % (lib,)
+ x_testcode = "zipimport._zip_directory_cache.clear(); " \
+ "zipimportx.zipimporter(%r)" % (lib,)
+ x_timer = timeit.Timer(x_testcode,x_setupcode)
+ x_time = min(self._do_timeit3(x_timer))
+ return (z_time,x_time)
+
+ def _do_timeit3(self,t):
+ return [self._do_timeit(t) for _ in xrange(3)]
+
+ def _do_timeit(self,t):
+ number = 10
+ n = t.timeit(number)
+ while n < 0.2:
+ number = number * 10
+ n = t.timeit(number)
+ return n / number
+
+ def test_README(self):
+ """Ensure that the README is in sync with the docstring.
+
+ This test should always pass; if the README is out of sync it just
+ updates it with the contents of zipimportx.__doc__.
+ """
+ dirname = os.path.dirname
+ readme = os.path.join(dirname(dirname(dirname(__file__))),"README.txt")
+ if not os.path.isfile(readme):
+ f = open(readme,"wb")
+ f.write(zipimportx.__doc__.encode())
+ f.close()
+ else:
+ f = open(readme,"rb")
+ if f.read() != zipimportx.__doc__:
+ f.close()
+ f = open(readme,"wb")
+ f.write(zipimportx.__doc__.encode())
+ f.close()
+
Please sign in to comment.
Something went wrong with that request. Please try again.