/
utils.py
322 lines (244 loc) · 10.6 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import sys
from django.conf import settings
from django.urls import reverse
from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _
from daiquiri.core.adapter import DatabaseAdapter
from daiquiri.core.utils import human2bytes
from daiquiri.metadata.models import Schema, Table, Column, Function
def get_format_config(format_key):
for format_config in settings.QUERY_DOWNLOAD_FORMATS:
if format_config['key'] == format_key:
return format_config
return None
def get_default_table_name():
return now().strftime("%Y-%m-%d-%H-%M-%S-%f")
def get_user_schema_name(user):
if not user or user.is_anonymous:
username = 'anonymous'
else:
username = user.username
return settings.QUERY_USER_SCHEMA_PREFIX + username
def get_quota(user):
if not user or user.is_anonymous:
quota = human2bytes(settings.QUERY_QUOTA.get('anonymous'))
else:
quota = human2bytes(settings.QUERY_QUOTA.get('user'))
# apply quota for user
users = settings.QUERY_QUOTA.get('users')
if users:
user_quota = human2bytes(users.get(user.username))
quota = user_quota if user_quota > quota else quota
# apply quota for group
groups = settings.QUERY_QUOTA.get('groups')
if groups:
for group in user.groups.all():
group_quota = human2bytes(groups.get(group.name))
quota = group_quota if group_quota > quota else quota
return quota
def get_max_active_jobs(user):
if not user or user.is_anonymous:
count = int(settings.QUERY_MAX_ACTIVE_JOBS.get('anonymous') or 0)
else:
count = int(settings.QUERY_MAX_ACTIVE_JOBS.get('user') or 0)
# apply quota for user
users = int(settings.QUERY_MAX_ACTIVE_JOBS.get('users') or 0)
if users:
user_count = int(users.get(user.username))
count = user_count if user_count and user_count > count else count
# apply quota for group
groups = int(settings.QUERY_MAX_ACTIVE_JOBS.get('groups') or 0)
if groups:
for group in user.groups.all():
group_count = int(groups.get(group.name))
count = group_count if group_count and group_count > count else count
return count
def fetch_user_schema_metadata(user, jobs):
schema_name = get_user_schema_name(user)
schema = {
'order': sys.maxsize,
'name': schema_name,
'query_strings': [schema_name],
'description': _('Your personal schema'),
'tables': []
}
for job in jobs:
table = {
'name': job.table_name,
'query_strings': [schema_name, job.table_name]
}
if job.metadata:
table['columns'] = job.metadata.get('columns', {})
for column in table['columns']:
column['query_strings'] = [column['name']]
schema['tables'].append(table)
return [schema]
def get_indexed_objects():
indexed_objects = {}
for column in Column.objects.exclude(index_for=''):
# TODO implement xtype 'spoint' properly
#if column.datatype not in indexed_objects:
# indexed_objects[column.datatype] = [column.indexed_columns]
#else:
# indexed_objects[column.datatype].append(column.indexed_columns)
if 'spoint' not in indexed_objects:
indexed_objects['spoint'] = [column.indexed_columns]
else:
indexed_objects['spoint'].append(column.indexed_columns)
return indexed_objects
def check_permissions(user, keywords, tables, columns, functions):
messages = []
# check keywords against whitelist
for keywords in keywords:
pass
# loop over tables to check permissions on schemas/tables
for schema_name, table_name in tables:
# check permission on schema
if schema_name is None:
# schema_name must not be null, move to next table
messages.append(_('No schema given for table %s.') % table_name)
continue
elif schema_name == get_user_schema_name(user):
# all tables are allowed move to next table
continue
else:
# check permissions on the schema
try:
schema = Schema.objects.filter_by_access_level(user).get(name=schema_name)
except Schema.DoesNotExist:
# schema not found or not allowed, move to next table
messages.append(_('Schema %s not found.') % schema_name)
continue
# check permission on table
if table_name is None:
# table_name must not be null, move to next table
messages.append(_('No table given for schema %s.') % schema_name)
continue
else:
try:
Table.objects.filter_by_access_level(user).filter(schema=schema).get(name=table_name)
except Table.DoesNotExist:
# table not found or not allowed, move to next table
messages.append(_('Table %s not found.') % table_name)
continue
# loop over columns to check permissions or just to see if they are there,
# but only if no error messages where appended so far
if not messages:
for schema_name, table_name, column_name in columns:
if schema_name in [None, get_user_schema_name(user)] \
or table_name is None \
or column_name is None:
# doesn't need to be checked, move to next column
continue
else:
if not settings.METADATA_COLUMN_PERMISSIONS:
# just check if the column exist
if column_name == '*':
# doesn't need to be checked, move to next table
continue
else:
try:
Column.objects.filter(table__schema__name=schema_name).filter(table__name=table_name).get(name=column_name)
except Column.DoesNotExist:
messages.append(_('Column %s not found.') % column_name)
continue
else:
try:
schema = Schema.objects.filter_by_access_level(user).get(name=schema_name)
except Schema.DoesNotExist:
messages.append(_('Schema %s not found.') % schema_name)
continue
try:
table = Table.objects.filter_by_access_level(user).filter(schema=schema).get(name=table_name)
except Table.DoesNotExist:
messages.append(_('Table %s not found.') % table_name)
continue
if column_name == '*':
columns = Column.objects.filter_by_access_level(user).filter(table=table)
actual_columns = DatabaseAdapter().fetch_columns(schema_name, table_name)
column_names_set = set([column.name for column in columns])
actual_column_names_set = set([column['name'] for column in actual_columns])
if column_names_set != actual_column_names_set:
messages.append(_('The asterisk (*) is not allowed for this table.'))
continue
else:
try:
column = Column.objects.filter_by_access_level(user).filter(table=table).get(name=column_name)
except Column.DoesNotExist:
messages.append(_('Column %s not found.') % column_name)
continue
# check permissions on functions
for function_name in functions:
# check permission on function
queryset = Function.objects.filter(name=function_name)
# forbit the function if it is in metadata.functions, and the user doesn't have access.
if queryset and not queryset.filter_by_access_level(user):
messages.append(_('Function %s is not allowed.') % function_name)
else:
continue
# return the error stack
return list(set(messages))
def get_job_sources(job):
sources = []
if 'tables' in job.metadata:
for schema_name, table_name in job.metadata['tables']:
table = {
'schema_name': schema_name,
'table_name': table_name
}
# fetch additional metadata from the metadata store
try:
original_table = Table.objects.get(
name=table_name,
schema__name=schema_name
)
table.update({
'title': original_table.title,
'description': original_table.description,
'attribution': original_table.attribution,
'license': original_table.license,
'doi': original_table.doi,
'url': reverse('metadata:table', args=[schema_name, table_name])
})
sources.append(table)
except Table.DoesNotExist:
pass
return sources
def get_job_column(job, display_column_name):
try:
schema_name, table_name, column_name = \
job.metadata['display_columns'][display_column_name]
except (ValueError, KeyError):
return {}
try:
column = Column.objects.get(
name=column_name,
table__name=table_name,
table__schema__name=schema_name
)
return {
'name': column.name,
'description': column.description,
'unit': column.unit,
'ucd': column.ucd,
'utype': column.utype,
'datatype': column.datatype,
'arraysize': column.arraysize,
'principal': column.principal,
'indexed': False,
'std': column.std
}
except Column.DoesNotExist:
return {}
def get_job_columns(job):
columns = []
if job.phase == job.PHASE_COMPLETED:
database_columns = DatabaseAdapter().fetch_columns(job.schema_name, job.table_name)
for database_column in database_columns:
column = get_job_column(job, database_column['name'])
column.update(database_column)
columns.append(column)
else:
for display_column in job.metadata['display_columns']:
columns.append(get_job_column(job, display_column))
return columns