Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

file 191 lines (145 sloc) 6.571 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
# urllib3/poolmanager.py
# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php

import logging

from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import connection_from_url, port_by_scheme
from .request import RequestMethods
from .util import parse_url


__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']


pool_classes_by_scheme = {
    'http': HTTPConnectionPool,
    'https': HTTPSConnectionPool,
}

log = logging.getLogger(__name__)

SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
                'ssl_version')


class PoolManager(RequestMethods):
    """
Allows for arbitrary requests while transparently keeping track of
necessary connection pools for you.

:param num_pools:
Number of connection pools to cache before discarding the least
recently used pool.

:param headers:
Headers to include with all requests, unless other headers are given
explicitly.

:param \**connection_pool_kw:
Additional parameters are used to create fresh
:class:`urllib3.connectionpool.ConnectionPool` instances.

Example: ::

>>> manager = PoolManager(num_pools=2)
>>> r = manager.request('GET', 'http://google.com/')
>>> r = manager.request('GET', 'http://google.com/mail')
>>> r = manager.request('GET', 'http://yahoo.com/')
>>> len(manager.pools)
2

"""

    def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
        RequestMethods.__init__(self, headers)
        self.connection_pool_kw = connection_pool_kw
        self.pools = RecentlyUsedContainer(num_pools,
                                           dispose_func=lambda p: p.close())

    def _new_pool(self, scheme, host, port, source_address):
        """
Create a new :class:`ConnectionPool` based on host, port and scheme.

This method is used to actually create the connection pools handed out
by :meth:`connection_from_url` and companion methods. It is intended
to be overridden for customization.
"""
        pool_cls = pool_classes_by_scheme[scheme]
        kwargs = self.connection_pool_kw
        if scheme == 'http':
            kwargs = self.connection_pool_kw.copy()
            for kw in SSL_KEYWORDS:
                kwargs.pop(kw, None)

        return pool_cls(host, port, source_address=source_address, **kwargs)

    def clear(self):
        """
Empty our store of pools and direct them all to close.

This will not affect in-flight connections, but they will not be
re-used after completion.
"""
        self.pools.clear()

    def connection_from_host(self, host, port=None, scheme='http', source_address=None):
        """
Get a :class:`ConnectionPool` based on the host, port, and scheme.

If ``port`` isn't given, it will be derived from the ``scheme`` using
``urllib3.connectionpool.port_by_scheme``.
"""
        scheme = scheme or 'http'
        port = port or port_by_scheme.get(scheme, 80)

        pool_key = (scheme, host, port, source_address)

        # If the scheme, host, or port doesn't match existing open connections,
        # open a new ConnectionPool.
        pool = self.pools.get(pool_key)
        if pool:
            return pool

        # Make a fresh ConnectionPool of the desired type
        pool = self._new_pool(scheme, host, port, source_address)
        self.pools[pool_key] = pool
        return pool

    def connection_from_url(self, url, source_address=None):
        """
Similar to :func:`urllib3.connectionpool.connection_from_url` but
doesn't pass any additional parameters to the
:class:`urllib3.connectionpool.ConnectionPool` constructor.

Additional parameters are taken from the :class:`.PoolManager`
constructor.
"""
        u = parse_url(url)
        return self.connection_from_host(u.host, port=u.port, scheme=u.scheme, source_address=source_address)

    def urlopen(self, method, url, redirect=True, **kw):
        """
Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
with custom cross-host redirect logic and only sends the request-uri
portion of the ``url``.

The given ``url`` parameter must be absolute, such that an appropriate
:class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
"""
        u = parse_url(url)
        conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)

        kw['assert_same_host'] = False
        kw['redirect'] = False
        if 'headers' not in kw:
            kw['headers'] = self.headers

        response = conn.urlopen(method, u.request_uri, **kw)

        redirect_location = redirect and response.get_redirect_location()
        if not redirect_location:
            return response

        if response.status == 303:
            method = 'GET'

        log.info("Redirecting %s -> %s" % (url, redirect_location))
        kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown
        return self.urlopen(method, redirect_location, **kw)


class ProxyManager(RequestMethods):
    """
Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method
will make requests to any url through the defined proxy. The ProxyManager
class will automatically set the 'Host' header if it is not provided.
"""

    def __init__(self, proxy_pool):
        self.proxy_pool = proxy_pool

    def _set_proxy_headers(self, url, headers=None):
        """
Sets headers needed by proxies: specifically, the Accept and Host
headers. Only sets headers not provided by the user.
"""
        headers_ = {'Accept': '*/*'}

        host = parse_url(url).host
        if host:
            headers_['Host'] = host

        if headers:
            headers_.update(headers)

        return headers_

    def urlopen(self, method, url, **kw):
        "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
        kw['assert_same_host'] = False
        kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers'))
        return self.proxy_pool.urlopen(method, url, **kw)


def proxy_from_url(url, **pool_kw):
    proxy_pool = connection_from_url(url, **pool_kw)
    return ProxyManager(proxy_pool)
Something went wrong with that request. Please try again.