/
filesystems.py
259 lines (208 loc) · 8.22 KB
/
filesystems.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# Copyright (c) 2017 The Regents of the University of Michigan
# All rights reserved.
# This software is licensed under the BSD 3-Clause License.
"""The file system handlers defined in this module
encapsulate the I/O operations required to store
and fetch data from different file systems."""
import os
import errno
import io
import warnings
from ..db import get_database
from ..version import __version__
from collections.abc import Mapping, Iterable
from deprecation import deprecated
try:
import pymongo
import gridfs
except ImportError:
GRIDFS = False
else:
GRIDFS = True
GRIDFS_LARGE_FILE_WARNING_THRSHLD = int(1e9) # 1GB
FILESYSTEM_REGISTRY = dict()
"""
THIS MODULE IS DEPRECATED!
"""
def _register_fs_class(fs):
"Register a file system handler in the module's registry."
FILESYSTEM_REGISTRY[fs.name] = fs
@deprecated(deprecated_in="1.3", removed_in="2.0", current_version=__version__,
details="The filesystems module is deprecated.")
class LocalFS(object):
"""A file system handler for the local file system.
This handler will store all files at the specified
root path using a file id based naming scheme.
:param root: The path to the root directory.
:type root: str
"""
name = 'localfs'
"General identifier for this file system handler."
FileExistsError = IOError
"A file with the specified id already exists."
FileNotFoundError = IOError
"A file with the specified id is not found."
class AutoRetry(RuntimeError):
pass
def __init__(self, root):
self.root = root
def config(self):
"Return the file system configuration for this handler."
return {'root': self.root}
def __repr__(self):
return '{}({})'.format(
type(self),
', '.join('{}={}'.format(k, v) for k, v in self.config().items()))
def _fn(self, _id, n=2, suffix='.dat'):
fn = os.path.join(
self.root,
* [_id[i:i + n] for i in range(0, len(_id), n)]) + suffix
return fn
def new_file(self, _id, mode=None):
"""Create a new file for _id.
:param _id: The file identifier.
:type _id: str
:returns: A file-like object to write to."""
if mode is None:
mode = 'xb'
if 'x' not in mode:
raise ValueError(mode)
fn = self._fn(_id)
try:
path = os.path.dirname(fn)
os.makedirs(path)
except OSError as error:
if not (error.errno == errno.EEXIST and os.path.isdir(path)):
raise
return open(fn, mode=mode)
def get(self, _id, mode='r'):
"""Open the file with the specified id.
:param _id: The file identifier.
:type _id: str
:param mode: The file mode used for opening.
:returns: A file-like object to read from."""
if 'r' not in mode:
raise ValueError(mode)
return open(self._fn(_id), mode=mode)
_register_fs_class(LocalFS)
if GRIDFS:
class GridFS(object):
"""A file system handler for the MongoDB `GridFS`_ file system.
.. note::
If the `database` argument is a :class:`str`, signac will
attempt to connect to the database using the
global configuration.
.. _`GridFS`: http://api.mongodb.org/python/current/api/gridfs/
:param db: The database used to store the grid.
:type db: str or :class:`pymongo.database.Database`
"""
name = 'gridfs'
"General identifier for this file system handler."
FileExistsError = gridfs.errors.FileExists
"A file with the specified id already exists."
FileNotFoundError = gridfs.errors.NoFile
"A file with the specified id is not found."
AutoRetry = pymongo.errors.AutoReconnect
def __init__(self, db, collection='fs'):
if isinstance(db, str):
self.db = None
self.db_name = db
else:
self.db = db
self.db_name = db.name
self.collection = collection
self._gridfs = None
def config(self):
"Return the file system configuration for this handler."
return {'db': self.db_name, 'collection': self.collection}
def __repr__(self):
return '{}({})'.format(
type(self),
', '.join('{}={}'.format(k, v) for k, v in self.config().items()))
@property
def gridfs(self):
"Instance of :class:`pymongo.gridfs.GridFS`."
if self._gridfs is None:
if self.db is None:
self.db = get_database(self.db_name)
self._gridfs = gridfs.GridFS(
self.db, collection=self.collection)
return self._gridfs
def new_file(self, _id):
"""Create a new file for _id.
:param _id: The file identifier.
:type _id: str
:returns: A file-like object to write to."""
return self.gridfs.new_file(_id=_id)
def get(self, _id, mode='r'):
"""Open the file with the specified id.
.. warning::
To avoid compatiblity issues, all files are
opened in text-mode (`r`) by default, however
for higher efficiency, files should generally
be opened in binary mode (`rb`) whenever possible.
:param _id: The file identifier.
:type _id: str
:param mode: The file mode used for opening.
:returns: A file-like object to read from."""
if mode == 'r':
file = io.StringIO(self.gridfs.get(_id).read().decode())
if len(file.getvalue()) > GRIDFS_LARGE_FILE_WARNING_THRSHLD:
warnings.warn(
"Open large GridFS files more efficiently in 'rb' mode.")
return file
elif mode == 'rb':
return self.gridfs.get(file_id=_id)
else:
raise ValueError(mode)
_register_fs_class(GridFS)
@deprecated(deprecated_in="1.3", removed_in="2.0", current_version=__version__,
details="The filesystems module is deprecated.")
def filesystems_from_config(fs_config):
"""Generate file system handlers from a configuration.
This function yields file system handler objects from
a file system configuration.
A configuration is a mapping where the key identifies the
type of file system, and the values represent the argument(s)
to the constructor of the specified file system handler.
Arguments can be provided as mappings, sequences or single values, e.g.:
.. code-block:: python
# The following two function calls are equivalent and both
# generate two file system handler objects:
filesystems_from_config({
'localfs': '/path/to/storage',
'gridfs': ('gridfsdb', 'fs'),
})
filesystems_from_config({
'localfs': {'root': '/path/to/storage'},
'gridfs': {'db': 'gridfsdb', 'collection': 'fs'}
})
See :class:`~.LocalFS` for an example of a file system class.
:param fs_config: A file system configuration.
:yields: file system handlers
"""
for key, args in fs_config.items():
fs_class = FILESYSTEM_REGISTRY[key]
if isinstance(args, Mapping):
yield fs_class(** args)
elif isinstance(args, Iterable) and not isinstance(args, str):
yield fs_class(* args)
else:
yield fs_class(args)
@deprecated(deprecated_in="1.3", removed_in="2.0", current_version=__version__,
details="The filesystems module is deprecated.")
def filesystems_from_configs(fs_configs):
"""Generate file system handlers.
The ``fs_configs`` argument may be a sequence of file system
handlers, file system configurations or a mix of both.
See also: :func:`.filesystems_from_config`.
:param fs_configs: A sequence of file system handlers or
configurations.
:yields: file system handlers
"""
for item in fs_configs:
if isinstance(item, Mapping):
for fs in filesystems_from_config(item):
yield fs
else:
yield item