# import_hook

所谓`import hook`就是指直接自定义finder和loader,并将finder放入`sys.meta_path`中的过程.

利用这个可以做到很多非常神奇的事情,比如

+ import某个特定模块时触发某个回调函数来通知我们
+ import一个远程服务器上的模块
+ 直接import其他语言(比如fortran)的模块来使用

本节需要的先验知识包括:

+ [模块的导入方式]()
+ [使用f2py为python嵌入fortran代码]()


## import hook的基本形式

import hook通常是以一个单文件模块的形式出现的,其中的过程说白了就是自定义finder和loader,因此自定义这两个类都是必须的,然后就是将定义的finder实例化,并将这个实例加入`sys.meta_path`.下面是模板代码.

```python
import importlib
from importlib.abc import (
    MetaPathFinder, 
    PathEntryFinder,
    Loader
)
from importlib.machinery import ModuleSpec
import sys
from collections import defaultdict


class ClientImportLoader(Loader):
    @classmethod
    def create_module(clz,spec):
        """用于创建模块的."""
        module = __create_module_from_spec(spec)
        return module or None

    @classmethod
    def exec_module (clz, module):
        """每次执行引入模块或者重载模块时会执行的操作"""
        pass

loader= ClientImportLoader()
    

class ClientImportFinder(MetaPathFinder):

    @classmethod
    def find_spec (klass, full_name, paths=None, target=None):
        """查找模块的逻辑"""
        pass
        return ModuleSpec(full_name, loader, origin=module_full_path)
    

sys.meta_path.insert(0, ClientImportFinder())

```


当这个定义import hook的模块被加载后,他就可以正常的执行自己的功能了,因此通常这个import hook的模块需要优先加载.


## import某个特定模块时触发某个回调函数来通知我们

这个例子来自python cookbook,不过上面的代码已经比较过时了,这边给出python3.5+推荐的写法

In [1]:
import importlib
from importlib.abc import (
    MetaPathFinder, 
    PathEntryFinder,
    Loader
)
from importlib.machinery import ModuleSpec
import sys
from collections import defaultdict

_post_import_hooks = defaultdict(list)

class ClientImportLoader(Loader):
    def __init__(self, finder):
        self._finder = finder
        

    def create_module(self,spec):
        """这边只要调用父类的实现即可."""
        return super().create_module(spec)

    def exec_module (self, module):
        """在_post_import_hooks中查找对应模块中的回调函数并执行."""
        for func in _post_import_hooks[module.__name__]:
            func(module)
        self._finder._skip.remove(module.__name__)
        
class ClientImportFinder(MetaPathFinder):
    
    def __init__(self):
        self._skip = set()

    def find_spec (self, full_name, paths=None, target=None):
        """."""
        if full_name in self._skip:
            return None
        self._skip.add(full_name)
        loader = ClientImportLoader(self)
        return ModuleSpec(full_name, loader, origin=paths)
        
        
def when_imported(fullname):
    def decorate(func):
        if fullname in sys.modules:
            func(sys.modules[fullname])
        else:
            _post_import_hooks[fullname].append(func)
        return func
    return decorate

finder = ClientImportFinder()
sys.meta_path.insert(0, finder)

In [2]:
@when_imported('numpy')
def warn_numpy(mod):
    print('numpy? Are you crazy?')


In [3]:
import numpy

numpy? Are you crazy?


In [4]:
finder._skip

set()

为了避免陷入无线循环,ClientImportFinder维护了一个所有被加载过的模块集合`_skip`,如果一个模块在加载过程中又有另一个地方来加载,那么就会跳过这个加载器

## import一个远程服务器上的模块



我们以导入github上我原来写的一个模块<https://raw.githubusercontent.com/Python-Tools/pypartten/master/pypartten/proxy.py>作为例子.

这个例子主要是复写finder以可以查找到目标服务器上的模块文件.同时复写loader的create_module方法用远端的代码生成服务.

In [1]:
import requests

In [3]:
n = requests.get("https://raw.githubusercontent.com/Python-Tools/pypartten/master/pypartten/proxy.py")

In [4]:
n.status_code

200

In [6]:
n.content.decode("utf-8")

'import abc\nfrom typing import (\n    Any\n)\n\n\nclass Proxy:\n    """代理.\n\n    用于作为占位符,在初始化后本身没有任何功能,但为其指定一个被代理对象后就可以通过访问代理来访问对象的内容了.\n\n    本代码来自peewee中的代理实现,个人认为非常简练好用,其意义在于:\n    1. 屏蔽对对象直接的写操作,避免对象被篡改\n    2. 重写__getattr__以提供一定的访问控制\n\n    用法:\n\n    >>> p = Proxy()\n    >>> class A:\n    ...     x=1\n    ...     y=2\n    >>> a = A()\n    >>> p.attach_callback(lambda x: print(x.x**2))\n    >>> p.attach_callback(lambda x: print(x.y**2))\n    >>> p.initialize(a)\n    1\n    4\n    >>> print(p.x)\n    1\n\n    protected:\n        _callbacks (List[function]): 保存要在调用了`initialize`后执行的回调函数,回调函数的参数都是`initialize`的参数.\n\n    """\n\n    __slots__ = (\'obj\', \'_callbacks\')\n\n    def __init__(self):\n        self._callbacks = []\n        self.initialize(None)\n\n    def initialize(self, obj: Any):\n        """[summary]\n\n        Args:\n            obj ([type]): [description]\n        """\n\n        self.obj = obj\n        for callback in self._callbacks:\n            callback(obj)\n\n  

In [1]:
import sys
import importlib.abc
import imp
import requests


# Module Loader for a URL
class UrlModuleLoader(importlib.abc.SourceLoader):
    def __init__(self, baseurl):
        self._baseurl = baseurl
        self._source_cache = {}
    
    def create_module(self,spec):
        """这边只要调用父类的实现即可."""
        code = self.get_code(spec.name)
        mod = sys.modules.setdefault(spec.name, imp.new_module(fullname))
        mod.__file__ = self.get_filename(fullname)
        mod.__loader__ = self
        mod.__package__ = spec.name.rpartition('.')[0]
        exec(code, mod.__dict__)
        return mod

    def exec_module (self, module):
        """在_post_import_hooks中查找对应模块中的回调函数并执行."""
        return module

    # Optional extensions
    def get_code(self, fullname):
        src = self.get_source(fullname)
        return compile(src, self.get_filename(fullname), 'exec')

    def get_data(self, path):
        pass

    def get_filename(self, fullname):
        return self._baseurl + '/' + fullname.split('.')[-1] + '.py'
    return self._baseurl + '/' + '__init__.py'

    def get_source(self, fullname):
        filename = self.get_filename(fullname)

        if filename in self._source_cache:

            return self._source_cache[filename]
        try:
            u = requests.get(filename)
        except Exception as e:
            raise ImportError("Can't load %s" % filename)
        if u.status_code == 200:
            source = u.content.decode("utf-8")
            self._source_cache[filename] = source
            return source
        else:
            raise ImportError("Can't load %s" % filename)
            
    def is_package(self, fullname):
        return False

# Package loader for a URL
class UrlPackageLoader(UrlModuleLoader):

    def create_module(self,spec):
        """这边只要调用父类的实现即可."""
        mod = super().create_module(spec)
        mod.__path__ = [ self._baseurl ]
        mod.__package__ = fullname
        return mod

    def exec_module (self, module):
        """在_post_import_hooks中查找对应模块中的回调函数并执行."""
        return module

    def get_filename(self, fullname):
        return self._baseurl + '/' + '__init__.py'

    def is_package(self, fullname):
        return True
    
    

class UrlMetaFinder(importlib.abc.MetaPathFinder):
    def __init__(self, baseurl):
        self._baseurl = baseurl
        self._links = { }
        self._loaders = { baseurl : UrlModuleLoader(baseurl) }

    def find_spec (self, full_name, paths=None, target=None):
        if paths is None:
            baseurl = self._baseurl
        else:
            if not paths[0].startswith(self._baseurl):
                return None
            baseurl = paths[0]
        parts = full_name.split('.')
        basename = parts[-1]

        # Check link cache
        if basename not in self._links:
            self._links[baseurl] = _get_links(baseurl)

        # Check if it's a package
        if basename in self._links[baseurl]:
            fullurl = self._baseurl + '/' + basename
            # Attempt to load the package (which accesses __init__.py)
            loader = UrlPackageLoader(fullurl)
            spec = ModuleSpec(full_name, loader, origin=paths)
#             try:
#                 loader.load_module(fullname)
#                 self._links[fullurl] = _get_links(fullurl)
#                 self._loaders[fullurl] = UrlModuleLoader(fullurl)
#             except ImportError as e:
#                 loader = None
            return spec
        # A normal module
        filename = basename + '.py'
        if filename in self._links[baseurl]:
            return self._loaders[baseurl]
        else:
            return None

    def invalidate_caches(self):
        self._links.clear()



# Utility functions for installing/uninstalling the loader
_installed_meta_cache = { }
def install_meta(address):
    if address not in _installed_meta_cache:
        finder = UrlMetaFinder(address)
        _installed_meta_cache[address] = finder
        sys.meta_path.append(finder)

def remove_meta(address):
    if address in _installed_meta_cache:
        finder = _installed_meta_cache.pop(address)
        sys.meta_path.remove(finder)

In [2]:
install_meta('https://raw.githubusercontent.com/Python-Tools/pypartten/master/pypartten')

In [3]:
import proxy

ModuleNotFoundError: No module named 'proxy'