/
tab_pool.py
132 lines (101 loc) · 4.21 KB
/
tab_pool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import logging
import urllib.parse
import os
import contextlib
import cachetools
import threading
from ChromeController.manager import ChromeRemoteDebugInterface
class _TabStore(cachetools.LRUCache):
def __init__(self, chrome_interface, *args, **kwargs):
assert "maxsize" in kwargs
assert kwargs['maxsize']
super().__init__(*args, **kwargs)
assert self.maxsize
self.chrome_interface = chrome_interface
self.log = logging.getLogger("Main.ChromeController.TabPool.Store")
def __getitem__(self, key):
self.log.debug("__getitem__: %s", key)
assert key is not None, "You have to pass a key to __getitem__!"
return super().__getitem__(key)
def __missing__(self, key):
self.log.debug("__missing__: %s", key)
assert key is not None, "You have to pass a key to __missing__!"
self[key] = (threading.Lock(), self.chrome_interface.new_tab())
return self[key]
def popitem(self):
key, value = super().popitem()
self.log.debug('Key "%s" evicted with value "%s"', key, value)
dummy_lock, tab = value
tab.close()
return None
class TabPooledChromium(object):
def __init__(self, *args, tab_pool_max_size = None, **kwargs):
'''
Create a chromium tab pool instance.
This will start a chromium instance, from which new tabs will be created as
needed with the tab() context manager.
Note that the destruction of the `TabPooledChromium` object will kill the associated chromium
execution. This will render any checked-out tabs invalid (though saving the tabs considering
they're constructed in a context-manager is pretty obviously wrong anyways).
'''
if tab_pool_max_size is None:
tab_pool_max_size = 10
self.chrome_interface = ChromeRemoteDebugInterface(*args, **kwargs)
self.tab_pool_max_size = tab_pool_max_size
self.log = logging.getLogger("Main.ChromeController.TabPool")
# We pass a tab to the tabstore, because otherwise it might wind up evicting the root tab,
# which would take the entire chrome instance down with it when it's closed.
self.__tab_cache = _TabStore(maxsize=tab_pool_max_size, chrome_interface=self.chrome_interface.new_tab())
self.__counter_lock = threading.Lock()
self.__active_tabs = {}
self.__started_pid = os.getpid()
self.alive = True
def close(self):
if self.alive:
self.chrome_interface.close()
self.alive = False
def __del__(self):
self.close()
try:
self.chrome_interface.close()
except Exception:
pass
@contextlib.contextmanager
def tab(self, netloc=None, url=None, extra_id=None, use_tid=False):
'''
Get a chromium tab from the pool, optionally one that has an association with a specific netloc/URL.
If no url or netloc is specified, the per-thread identifier will be used.
If `extra_id` is specified, it's stringified value will be mixed into the pool key
If `use_tid` is true, the per-thread identifier will be mixed into the pool key.
In all cases, the tab pool is a least-recently-used cache, so the tab that has been accessed the
least recently will be automatically closed if a new tab is requested, and there are already
`tab_pool_max_size` tabs created.
'''
assert self.alive, "Chrome has been shut down! Cannot continue!"
if not netloc and url:
netloc = urllib.parse.urlparse(url).netloc
self.log.debug("Getting tab for netloc: %s (url: %s)", netloc, url)
# Coerce to string type so even if it's none, it doesn't hurt anything.
key = str(netloc)
if extra_id:
key += " " + str(extra_id)
if use_tid or not key:
key += " " + str(threading.get_ident())
if self.__started_pid != os.getpid():
self.log.error("TabPooledChromium instances are not safe to share across multiple processes.")
self.log.error("Please create a new in each separate multiprocesssing process.")
raise RuntimeError("TabPooledChromium instances are not safe to share across multiple processes.")
with self.__counter_lock:
self.__active_tabs.setdefault(key, 0)
self.__active_tabs[key] += 1
if self.__active_tabs[key] > 1:
self.log.warning("Tab with key %s checked out more then once simultaneously", key)
try:
lock, tab = self.__tab_cache[key]
with lock:
yield tab
finally:
with self.__counter_lock:
self.__active_tabs[key] -= 1
if self.__active_tabs[key] == 0:
self.__active_tabs.pop(key)