# 构建一个模块的层级包

```
graphics/
    __init__.py
    primitive/
        __init__.py
        line.py
        fill.py
        text.py
    formats/
        __init__.py
        png.py
        jpg.py
```

 文件`__init__.py`的目的是要包含不同运行级别的包的可选的初始化代码。

# 控制模块被全部导入的内容

尽管强烈反对使用 `from module import *`, 但是在定义了大量变量名的模块中频繁使用。 如果你不做任何事, 这样的导入将会导入所有不以下划线开头的。 另一方面,如果定义了` __all__` , 那么只有被列举出的东西会被导出。

如果你将 `__all__` 定义成一个空列表, 没有东西将被导入。 如果 `__all__` 包含未定义的名字, 在导入时引起`AttributeError`。

In [1]:
# somemodule.py
def spam():
    pass

def grok():
    pass

blah = 42
# Only export 'spam' and 'grok'
__all__ = ['spam', 'grok']

# 使用相对路径名导入包中子模块

```
mypackage/
    __init__.py
    A/
        __init__.py
        spam.py
        grok.py
    B/
        __init__.py
        bar.py
```

如果模块`mypackage.A.spam`要导入同目录下的模块`grok`，它应该包括的`import`语句如下：
```python
# mypackage/A/spam.py
from . import grok
```

如果模块`mypackage.A.spam`要导入不同目录下的模块`B.bar`，它应该使用的`import`语句如下：
```python
# mypackage/A/spam.py
from ..B import bar
```

`import`语句的 `.` 和 `..` 看起来很滑稽, 但它指定目录名`.`为当前目录，`..B`为目录`../B`。这种语法只适用于`from`。
```python
from . import grok # OK
import .grok # ERROR
```

相对导入只适用于在合适的包中的模块。尤其是在顶层的脚本的简单模块中，它们将不起作用。如果包的部分被作为脚本直接执行，那它们将不起作用

使用Python的-m选项来执行先前的脚本，相对导入将会正确运行

# 将模块分割成多个文件

```python
# mymodule.py
class A:
    def spam(self):
        print('A.spam')

class B(A):
    def bar(self):
        print('B.bar')
```

```
mymodule/
    __init__.py
    a.py
    b.py
```

```python
# a.py
class A:
    def spam(self):
        print('A.spam')
```

```python
# b.py
from .a import A
class B(A):
    def bar(self):
        print('B.bar')
```

```python
# __init__.py
from .a import A
from .b import B
```

# 利用命名空间导入目录分散的代码

```python
import sys
sys.path.extend(['foo-package', 'bar-package'])
```

# 重新加载模块

```python
import imp
imp.reload(spam)
```

`reload()`没有更新像`from module import name`这样使用`import`语句导入的定义

# 运行目录或压缩文件

如果你的应用程序已经有多个文件，你可以把你的应用程序放进它自己的目录并添加一个`__main__.py`文件。 

```python
myapplication/
    spam.py
    bar.py
    grok.py
    __main__.py
```

如果`__main__.py`存在，你可以简单地在顶级目录运行Python解释器：
```bash
bash % python3 myapplication
```

将你的代码打包成zip文件，这种技术同样也适用

# 读取位于包中的数据文件

```python
mypackage/
    __init__.py
    somedata.dat
    spam.py
```

```python
# spam.py
import pkgutil
data = pkgutil.get_data(__package__, 'somedata.dat')
```

`get_data()`的第一个参数是包含包名的字符串。你可以直接使用包名，也可以使用特殊的变量，比如`__package__`。第二个参数是包内文件的相对名称。

# 文件夹加入到`sys.path`

## 使用PYTHONPATH环境变量来添加

```bash
bash % env PYTHONPATH=/some/dir:/other/dir python3
```

## 创建一个.pth文件，将目录列举出来

```python
# myapplication.pth
/some/dir
/other/dir
```

## 硬编码

```python
import sys
from os.path import abspath, join, dirname
sys.path.insert(0, join(abspath(dirname(__file__)), 'src'))
```

# 通过字符串名导入模块

In [12]:
import importlib
math = importlib.import_module('math')
math.sin(2)

0.9092974268256817

正在使用的包，`import_module()`也可用于相对导入
```python
import importlib
# Same as 'from . import b'
b = importlib.import_module('.b', __package__)
```

# 通过钩子远程加载模块

```python
testcode/
    spam.py
    fib.py
    grok/
        __init__.py
        blah.py
```

在`testcode`目录中像下面这样运行`Python`:
```bash
bash % cd testcode
bash % python3 -m http.server 15000
Serving HTTP on 0.0.0.0 port 15000 ...
```

```python
from urllib.request import urlopen
u = urlopen('http://localhost:15000/fib.py')
data = u.read().decode('utf-8')
```

## 显式的加载函数

In [14]:
import imp
import urllib.request
import sys

def load_module(url):
    u = urllib.request.urlopen(url)
    source = u.read().decode('utf-8')
    mod = sys.modules.setdefault(url, imp.new_module(url))
    code = compile(source, url, 'exec')
    mod.__file__ = url
    mod.__package__ = ''
    exec(code, mod.__dict__)
    return mod

## 元路径导入器

In [15]:
# urlimport.py
import sys
import importlib.abc
import imp
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
from html.parser import HTMLParser

# Debugging
import logging
log = logging.getLogger(__name__)

# Get links from a given URL
def _get_links(url):
    class LinkParser(HTMLParser):
        def handle_starttag(self, tag, attrs):
            if tag == 'a':
                attrs = dict(attrs)
                links.add(attrs.get('href').rstrip('/'))
    links = set()
    try:
        log.debug('Getting links from %s' % url)
        u = urlopen(url)
        parser = LinkParser()
        parser.feed(u.read().decode('utf-8'))
    except Exception as e:
        log.debug('Could not get links. %s', e)
    log.debug('links: %r', links)
    return links

class UrlMetaFinder(importlib.abc.MetaPathFinder):
    def __init__(self, baseurl):
        self._baseurl = baseurl
        self._links = { }
        self._loaders = { baseurl : UrlModuleLoader(baseurl) }

    def find_module(self, fullname, path=None):
        log.debug('find_module: fullname=%r, path=%r', fullname, path)
        if path is None:
            baseurl = self._baseurl
        else:
            if not path[0].startswith(self._baseurl):
                return None
            baseurl = path[0]
        parts = fullname.split('.')
        basename = parts[-1]
        log.debug('find_module: baseurl=%r, basename=%r', baseurl, basename)

        # Check link cache
        if basename not in self._links:
            self._links[baseurl] = _get_links(baseurl)

        # Check if it's a package
        if basename in self._links[baseurl]:
            log.debug('find_module: trying package %r', fullname)
            fullurl = self.base_url + '/' + basename
            # Attempt to load the package (which accesses __init__.py)
            loader = UrlPackageLoader(fullurl)
            try:
                loader.load_module(fullname)
                self._links[fullurl] = _get_links(fullurl)
                self._loaders[fullurl] = UrlModuleLoader(fullurl)
                log.debug('find_module: package %r loaded', fullname)
            except ImportError as e:
                log.debug('find_module: package failed. %s', e)
                loader = None
            return loader
        # A normal module
        filename = basename + '.py'
        if filename in self._links[baseurl]:
            log.debug('find_module: module %r found', fullname)
            return self._loaders[baseurl]
        else:
            log.debug('find_module: module %r not found', fullname)
            return None

    def invalidate_caches(self):
        log.debug('invalidating link cache')
        self._links.clear()

# Module Loader for a URL
class UrlModuleLoader(importlib.abc.SourceLoader):
    def __init__(self, baseurl):
        self._baseurl = baseurl
        self._source_cache = {}

    def module_repr(self, module):
        return '<urlmodule %r from %r>' % (module.__name__, module.__file__)

    # Required method
    def load_module(self, fullname):
        code = self.get_code(fullname)
        mod = sys.modules.setdefault(fullname, imp.new_module(fullname))
        mod.__file__ = self.get_filename(fullname)
        mod.__loader__ = self
        mod.__package__ = fullname.rpartition('.')[0]
        exec(code, mod.__dict__)
        return mod

    # Optional extensions
    def get_code(self, fullname):
        src = self.get_source(fullname)
        return compile(src, self.get_filename(fullname), 'exec')

    def get_data(self, path):
        pass

    def get_filename(self, fullname):
        return self._baseurl + '/' + fullname.split('.')[-1] + '.py'

    def get_source(self, fullname):
        filename = self.get_filename(fullname)
        log.debug('loader: reading %r', filename)
        if filename in self._source_cache:
            log.debug('loader: cached %r', filename)
            return self._source_cache[filename]
        try:
            u = urlopen(filename)
            source = u.read().decode('utf-8')
            log.debug('loader: %r loaded', filename)
            self._source_cache[filename] = source
            return source
        except (HTTPError, URLError) as e:
            log.debug('loader: %r failed. %s', filename, e)
            raise ImportError("Can't load %s" % filename)

    def is_package(self, fullname):
        return False

# Package loader for a URL
class UrlPackageLoader(UrlModuleLoader):
    def load_module(self, fullname):
        mod = super().load_module(fullname)
        mod.__path__ = [ self._baseurl ]
        mod.__package__ = fullname

    def get_filename(self, fullname):
        return self._baseurl + '/' + '__init__.py'

    def is_package(self, fullname):
        return True

# Utility functions for installing/uninstalling the loader
_installed_meta_cache = { }
def install_meta(address):
    if address not in _installed_meta_cache:
        finder = UrlMetaFinder(address)
        _installed_meta_cache[address] = finder
        sys.meta_path.append(finder)
        log.debug('%r installed on sys.meta_path', finder)

def remove_meta(address):
    if address in _installed_meta_cache:
        finder = _installed_meta_cache.pop(address)
        sys.meta_path.remove(finder)
        log.debug('%r removed from sys.meta_path', finder)

## 编写一个钩子直接嵌入到 sys.path 变量中

In [16]:
# urlimport.py
# ... include previous code above ...
# Path finder class for a URL
class UrlPathFinder(importlib.abc.PathEntryFinder):
    def __init__(self, baseurl):
        self._links = None
        self._loader = UrlModuleLoader(baseurl)
        self._baseurl = baseurl

    def find_loader(self, fullname):
        log.debug('find_loader: %r', fullname)
        parts = fullname.split('.')
        basename = parts[-1]
        # Check link cache
        if self._links is None:
            self._links = [] # See discussion
            self._links = _get_links(self._baseurl)

        # Check if it's a package
        if basename in self._links:
            log.debug('find_loader: trying package %r', fullname)
            fullurl = self._baseurl + '/' + basename
            # Attempt to load the package (which accesses __init__.py)
            loader = UrlPackageLoader(fullurl)
            try:
                loader.load_module(fullname)
                log.debug('find_loader: package %r loaded', fullname)
            except ImportError as e:
                log.debug('find_loader: %r is a namespace package', fullname)
                loader = None
            return (loader, [fullurl])

        # A normal module
        filename = basename + '.py'
        if filename in self._links:
            log.debug('find_loader: module %r found', fullname)
            return (self._loader, [])
        else:
            log.debug('find_loader: module %r not found', fullname)
            return (None, [])

    def invalidate_caches(self):
        log.debug('invalidating link cache')
        self._links = None

# Check path to see if it looks like a URL
_url_path_cache = {}
def handle_url(path):
    if path.startswith(('http://', 'https://')):
        log.debug('Handle path? %s. [Yes]', path)
        if path in _url_path_cache:
            finder = _url_path_cache[path]
        else:
            finder = UrlPathFinder(path)
            _url_path_cache[path] = finder
        return finder
    else:
        log.debug('Handle path? %s. [No]', path)

def install_path_hook():
    sys.path_hooks.append(handle_url)
    sys.path_importer_cache.clear()
    log.debug('Installing handle_url')

def remove_path_hook():
    sys.path_hooks.remove(handle_url)
    sys.path_importer_cache.clear()
    log.debug('Removing handle_url')

## 导入机制解析

In [19]:
# 创建一个新的模块对象
import imp
m = imp.new_module('spam')
m.__name__

'spam'

模块会被解释器缓存起来。模块缓存可以在字典 `sys.modules` 中被找到

In [20]:
import sys
import imp
m = sys.modules.setdefault('spam',imp.new_module('spam'))
m

<module 'spam'>

In [21]:
import math
m = sys.modules.setdefault('math',imp.new_module('math'))
m

<module 'math' from '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/lib-dynload/math.cpython-38-darwin.so'>

最高层上，导入操作被一个位于`sys.meta_path`列表中的“元路径”查找器处理。

In [22]:
from pprint import pprint
pprint(sys.meta_path)

[<class '_frozen_importlib.BuiltinImporter'>,
 <class '_frozen_importlib.FrozenImporter'>,
 <class '_frozen_importlib_external.PathFinder'>,
 <six._SixMetaPathImporter object at 0x7fec43c4ad90>]


In [32]:
class Finder:
    def find_module(self, fullname, path):
        print('Looking for', fullname, path)
        return None
import sys
sys.meta_path.insert(0,Finder())
imp.reload(xml.etree.ElementTree)

Looking for xml.etree.ElementTree ['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml/etree']
Looking for xml.etree.ElementTree ['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml/etree']
Looking for xml.etree.ElementTree ['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml/etree']
Looking for xml.etree.ElementTree ['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml/etree']
Looking for xml.etree.ElementTree ['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml/etree']
Looking for xml.etree.ElementTree ['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml/etree']
Looking for xml.etree.ElementTree ['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml/etree']
Looking for xml.etree.ElementTree ['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml/etree']


<module 'xml.etree.ElementTree' from '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml/etree/ElementTree.py'>

In [33]:
pprint(sys.path)

['/Users/huzhenyu/Documents/personal/python_personal/python/cookbook',
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python38.zip',
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8',
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/lib-dynload',
 '',
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages',
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/IPython/extensions',
 '/Users/huzhenyu/.ipython']


In [34]:
pprint(sys.path_importer_cache)

{'/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8': FileFinder('/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8'),
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/asyncio': FileFinder('/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/asyncio'),
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/collections': FileFinder('/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/collections'),
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/concurrent': FileFinder('/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/concurrent'),
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/concurrent/futures': FileFinder('/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/concurrent/futures'),
 '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/ctypes': FileFinder('/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/ctypes'),
 '/L

要执行 `import fib` ，会顺序检查 `sys.path` 中的目录。 对于每个目录，名称`fib`会被传给相应的 `sys.path_importer_cache` 中的查找器。

In [36]:
class Finder:
    def find_loader(self, name):
        print('Looking for', name)
        return (None, [])

In [40]:
import sys
sys.path_importer_cache.clear()
sys.path_importer_cache['debug'] = Finder()
sys.path.insert(0, 'debug')
import fib

Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib
Looking for fib
Looking for fib
Looking for fib


ModuleNotFoundError: No module named 'fib'

`sys.path_importer_cache` 的使用被一个存储在 `sys.path_hooks` 中的函数列表控制

In [41]:
sys.path_importer_cache.clear()
def check_path(path):
    print('Checking', path)
    raise ImportError()

In [42]:
sys.path_hooks.insert(0, check_path)
import fib

Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Looking for fib None
Checking debug
Checking /Users/huzhenyu/Documents/personal/python_personal/python/cookbook
Checking /Library/Frameworks/Python.framework/Versions/3.8/lib/python38.zip
Checking /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8
Checking /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/lib-dynload
Checking /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages
Checking /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/IPython/extensions
Checking /Users/huzhenyu/.ipython


ModuleNotFoundError: No module named 'fib'

一个新的 `UrlPathFinder` 实例被创建并被放入 `sys.path_importer_cache`. 之后，所有需要检查 `sys.path` 的导入语句都会使用你的自定义查找器。

对于简单模块，`find_loader()` 返回一个元组`(loader, None)`， 其中的`loader`是一个用于导入模块的加载器实例。

对于一个普通的包，`find_loader()` 返回一个元组`(loader, path)`， 其中的`loader`是一个用于导入包（并执行`__init__.py`）的加载器实例， `path`是一个会初始化包的 `__path__` 属性的目录列表。

`find_loader()` 还要能处理一个命名空间包。 一个命名空间包中有一个合法的包目录名，但是不存在`__init__.py`文件。   
这样的话，`find_loader()` 必须返回一个元组`(None, path)`， `path`是一个目录列表，由它来构建包的定义有`__init__.py`文件的`__path__`属性。   
对于这种情况，导入机制会继续前行去检查`sys.path`中的目录。 如果找到了命名空间包，所有的结果路径被加到一起来构建最终的命名空间包。  

所有的包都包含了一个内部路径设置，可以在`__path__`属性中看到

In [44]:
import xml.etree.ElementTree
xml.__path__

['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/xml']

# 导入模块的同时修改模块

In [51]:
# postimport.py
import importlib
import sys
from collections import defaultdict

_post_import_hooks = defaultdict(list)

class PostImportFinder:
    def __init__(self):
        self._skip = set()

    def find_module(self, fullname, path=None):
        if fullname in self._skip:
            return None
        self._skip.add(fullname)
        return PostImportLoader(self)

class PostImportLoader:
    def __init__(self, finder):
        self._finder = finder

    def load_module(self, fullname):
        importlib.import_module(fullname)
        module = sys.modules[fullname]
        for func in _post_import_hooks[fullname]:
            func(module)
        self._finder._skip.remove(fullname)
        return module

def when_imported(fullname):
    def decorate(func):
        if fullname in sys.modules:
            func(sys.modules[fullname])
        else:
            _post_import_hooks[fullname].append(func)
        return func
    return decorate

sys.meta_path.insert(0, PostImportFinder())

In [50]:
@when_imported('threading')
def warn_threads(mod):
    print('Threads? Are you crazy?')

Threads? Are you crazy?


In [52]:
import threading

In [74]:
from functools import wraps

def logged(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        print('Calling', func.__name__, args, kwargs)
        return func(*args, **kwargs)
    return wrapper

# Example
@when_imported('math')
def add_logging(mod):
    mod.cos = logged(mod.cos)
    mod.sin = logged(mod.sin)

In [75]:
math.sin(2)

Calling sin (2,) {}
Calling sin (2,) {}
Calling sin (2,) {}
Calling sin (2,) {}
Calling sin (2,) {}
Calling sin (2,) {}


0.9092974268256817

In [63]:
_post_import_hooks

defaultdict(list, {'math': []})

# 安装私有的包

```bash
python3 setup.py install --user
pip install --user packagename
```

# 创建新的Python环境

```bash
bash % pyvenv Spam
```

# 分发包

目录结构

```python
projectname/
    README.txt
    Doc/
        documentation.txt
    projectname/
        __init__.py
        foo.py
        bar.py
        utils/
            __init__.py
            spam.py
            grok.py
    examples/
        helloworld.py
        ...
```

要让你的包可以发布出去，首先你要编写一个 `setup.py`

```python
# setup.py

from distutils.core import setup

setup(name='projectname',
    version='1.0',
    author='Your Name',
    author_email='you@youraddress.com',
    url='http://www.you.com/projectname',
    packages=['projectname', 'projectname.utils'],
)
```

创建一个 `MANIFEST.in` 文件，列出所有在你的包中需要包含进来的非源码文件

```python
# MANIFEST.in
include *.txt
recursive-include examples *
recursive-include Doc *
```

确保 `setup.py` 和 `MANIFEST.in` 文件放在你的包的最顶级目录中

```bash
% bash python3 setup.py sdist
```