Skip to content
This repository has been archived by the owner on Feb 25, 2018. It is now read-only.

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
rfk committed Jul 21, 2010
0 parents commit 5f2d0f9
Show file tree
Hide file tree
Showing 6 changed files with 444 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .gitignore
@@ -0,0 +1,8 @@
*.pyc
*.pyo
*~
.*.sw*
MANIFEST
build
dist
zipimportx/tests/lib*
5 changes: 5 additions & 0 deletions ChangeLog.txt
@@ -0,0 +1,5 @@

v0.1.0:

* initial release; you might say *everything* has changed.

54 changes: 54 additions & 0 deletions README.txt
@@ -0,0 +1,54 @@


zipimportx: faster zip imports using pre-processed index files
===============================================================


This package aims to speed up imports from zipfiles, by pre-computing the
"directory information" dictionary and storing in a separate index file.
This reduces the time spent parsing information out of the zipfile.

It exports a single useful name, zipimporter, which is a drop-in replacement
for the standard zipimporter class.

To create an index for a given zipfile, do the following::

from zipimportx import zipimporter
zipimporter("mylib.zip").write_index()

This will create two files, "mylib.zip.win32.idx" and "mylib.zip.posix.idx",
containing the zipfile directory information pre-parsed and formatted to
different path-naming conventions. (Specifically, they contain a marshalled
dictionary similar to those found in zipimport._zip_directory_cache.)

To enable use of these index files, simply replace the builtin zipimport
mechanism with zipimportx by doing the following::

import zipimportx
zipimportx.zipimporter.install()

In my tests, use of these indexes speeds up the loading of a zipfile by about
a factor of 3 on Linux, and a factor of 5 on Windows.

Note that this package uses nothing but builtin modules. To bootstrap zipfile
imports for a frozen application, you can inline the module's code directly
into your application's startup script. Do this somewhere in your build::

import zipimportx
import inspect

SCRIPT = '''
%s
zipimporter.install()
import myapp
myapp.main()
''' % (inspect.getsource(zipimportx),)

freeze_this_script_somehow(SCRIPT)
zipimportx.zipimporter("path/to/frozen/library.zip").write_indexes()

Note also that imports will almost certainly *break* if the index does not
reflect the actual contents of the zipfile. This module is therefore most
useful for frozen apps and other situations where the zipfile is not expected
to change.

67 changes: 67 additions & 0 deletions setup.py
@@ -0,0 +1,67 @@
# Copyright (c) 2009-2010, Cloud Matrix Pty. Ltd.
# All rights reserved; available under the terms of the BSD License.

import sys
setup_kwds = {}
if sys.version_info > (3,):
from setuptools import setup
setup_kwds["test_suite"] = "zipimportx.tests"
setup_kwds["use_2to3"] = True
else:
from distutils.core import setup

# This awfulness is all in aid of grabbing the version number out
# of the source code, rather than having to repeat it here. Basically,
# we parse out all lines starting with "__version__" and execute them.
try:
next = next
except NameError:
def next(i):
return i.next()
info = {}
try:
src = open("zipimportx/__init__.py")
lines = []
ln = next(src)
while "__version__" not in ln:
lines.append(ln)
ln = next(src)
while "__version__" in ln:
lines.append(ln)
ln = next(src)
exec("".join(lines),info)
except Exception:
raise
pass
print info


NAME = "zipimportx"
VERSION = info["__version__"]
DESCRIPTION = "faster zip imports using pre-processed index files"
AUTHOR = "Ryan Kelly"
AUTHOR_EMAIL = "rfk@cloudmatrix.com.au"
URL = "http://github.com/cloudmatrix/zipimportx/"
LICENSE = "BSD"
KEYWORDS = "zipfile zip import"
LONG_DESC = info["__doc__"]

PACKAGES = ["zipimportx","zipimportx.tests"]
EXT_MODULES = []
PKG_DATA = {}

setup(name=NAME,
version=VERSION,
author=AUTHOR,
author_email=AUTHOR_EMAIL,
url=URL,
description=DESCRIPTION,
long_description=LONG_DESC,
keywords=KEYWORDS,
packages=PACKAGES,
ext_modules=EXT_MODULES,
package_data=PKG_DATA,
license=LICENSE,
**setup_kwds
)

192 changes: 192 additions & 0 deletions zipimportx/__init__.py
@@ -0,0 +1,192 @@
# Copyright (c) 2009-2010, Cloud Matrix Pty. Ltd.
# All rights reserved; available under the terms of the BSD License.
"""
zipimportx: faster zip imports using pre-processed index files
===============================================================
This package aims to speed up imports from zipfiles, by pre-computing the
"directory information" dictionary and storing in a separate index file.
This reduces the time spent parsing information out of the zipfile.
It exports a single useful name, zipimporter, which is a drop-in replacement
for the standard zipimporter class.
To create an index for a given zipfile, do the following::
from zipimportx import zipimporter
zipimporter("mylib.zip").write_index()
This will create two files, "mylib.zip.win32.idx" and "mylib.zip.posix.idx",
containing the zipfile directory information pre-parsed and formatted to
different path-naming conventions. (Specifically, they contain a marshalled
dictionary similar to those found in zipimport._zip_directory_cache.)
To enable use of these index files, simply replace the builtin zipimport
mechanism with zipimportx by doing the following::
import zipimportx
zipimportx.zipimporter.install()
In my tests, use of these indexes speeds up the loading of a zipfile by about
a factor of 3 on Linux, and a factor of 5 on Windows.
Note that this package uses nothing but builtin modules. To bootstrap zipfile
imports for a frozen application, you can inline the module's code directly
into your application's startup script. Do this somewhere in your build::
import zipimportx
import inspect
SCRIPT = '''
%s
zipimporter.install()
import myapp
myapp.main()
''' % (inspect.getsource(zipimportx),)
freeze_this_script_somehow(SCRIPT)
zipimportx.zipimporter("path/to/frozen/library.zip").write_indexes()
Note also that imports will almost certainly *break* if the index does not
reflect the actual contents of the zipfile. This module is therefore most
useful for frozen apps and other situations where the zipfile is not expected
to change.
"""

__ver_major__ = 0
__ver_minor__ = 1
__ver_patch__ = 0
__ver_sub__ = ""
__ver_tuple__ = (__ver_major__,__ver_minor__,__ver_patch__,__ver_sub__)
__version__ = "%d.%d.%d%s" % __ver_tuple__


import sys
import marshal
import zipimport


class zipimporter(zipimport.zipimporter):
"""A zipimporter that can use pre-processed index files.
"""

def __init__(self,archivepath):
if archivepath not in zipimport._zip_directory_cache:
# Pre-populate the zip directory cache using the index file.
if sys.platform == "win32":
idxfile = archivepath+".win32.idx"
else:
idxfile = archivepath+".posix.idx"
try:
with open(idxfile,"rb") as f:
index = marshal.load(f)
zipimport._zip_directory_cache[archivepath] = index
except EnvironmentError:
pass
super(zipimporter,self).__init__(archivepath)

def load_module(self,fullname):
"""load_module(fullname) -> module.
Load the module specified by 'fullname'. 'fullname' must be the
fully qualified (dotted) module name. It returns the imported
module, or raises ZipImportError if it wasn't found.
"""
self._fix_filename(fullname)
return super(zipimporter,self).load_module(fullname)

def get_code(self,fullname):
"""get_code(fullname) -> code object.
Return the code object for the specified module. Raise ZipImportError
if the module couldn't be found.
"""
self._fix_filename(fullname)
return super(zipimporter,self).get_code(fullname)

def _get_filename(self,fullname):
"""_get_filename(fullname) -> filename string.
Return the filename for the specified module.
"""
self._fix_filename(fullname)
return super(zipimporter,self)._get_filename(fullname)

def _fix_filename(self,fullname):
"""Fix the __file__ entry in the TOC for the given module.
Since the pre-processed index doesn't store filename information,
this must be added back into the TOC when it's needed. Fortunately
it's trivial to calculate.
"""
SEP = "\\" if sys.platform == "win32" else "/"
modpath = self.prefix
if not modpath.endswith(SEP):
modpath += SEP
modpath += fullname.replace(".",SEP)
for suffix in (".py",".pyc",".pyo"):
for extra in ("",SEP+"__init__"):
path = modpath + extra + suffix
try:
info = self._files[path]
if info[0] != "":
return # already fixed
info = (self.archive + SEP + path,) + info[1:]
self._files[path] = info
except KeyError:
pass

def write_index(self):
"""Create pre-processed index files for this zipimport archive.
This method creates files <archive>.posix.idx and <archive>.win32.idx
containing a pre-processes index of the zipfile contents found in the
file <archive>. This index can then be used to speed up loading of
the zipfile.
"""
index = zipimport._zip_directory_cache[self.archive].copy()
# Don't store the __file__ field, it won't be correct.
# Besides, we can re-create it as needed.
for (key,info) in index.iteritems():
index[key] = ("",) + info[1:]
# Store separate index for win32-format and posix-format paths.
# Yuck, but fixing them up at import time is a big performance hit.
if sys.platform == "win32":
win32_index = index
posix_index = {}
for (key,info) in index.iteritems():
posix_index[key.replace("\\","/")] = info
else:
posix_index = index
win32_index = {}
for (key,info) in index.iteritems():
win32_index[key.replace("/","\\")] = info
with open(self.archive+".win32.idx","wb") as f:
marshal.dump(win32_index,f)
with open(self.archive+".posix.idx","wb") as f:
marshal.dump(posix_index,f)

@classmethod
def install(cls):
"""Install this class into the import machinery.
This class method installs the custom zipimporter class into the import
machinery of the running process, relacing any of its superclasses
that may be there.
"""
installed = False
for i,imp in enumerate(sys.path_hooks):
try:
if issubclass(cls,imp):
sys.path_hooks[i] = cls
installed = True
except TypeError:
pass
if not installed:
sys.path_hooks.append(cls)


0 comments on commit 5f2d0f9

Please sign in to comment.