New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improvement: make Python datasource to use the RestrictedPython sandbox #404
Changes from 1 commit
acfa55e
8f4285b
cdad4be
6468757
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,44 @@ | ||
import sys | ||
import json | ||
import logging | ||
|
||
from redash.query_runner import * | ||
from redash import models | ||
|
||
import importlib | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
try: | ||
from RestrictedPython import compile_restricted | ||
from RestrictedPython.Guards import safe_builtins | ||
|
||
enabled = True | ||
except ImportError: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for this - it's in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Will remove it - unless you want to remove it from the requirements file instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, let's keep it in the requirements. Feels harmless enough. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Will remove the try..except |
||
logger.warning("Missing dependencies. Please install RestrictedPython") | ||
logger.warning("You can use pip: pip install RestrictedPython") | ||
|
||
enabled = False | ||
|
||
ALLOWED_MODULES = {} | ||
|
||
# Custom hooks which controls the way objects/lists/tuples/dicts behave in | ||
# RestrictedPython | ||
def custom_write(obj): | ||
return obj | ||
|
||
def custom_import(name, globals=None, locals=None, fromlist=(), level=0): | ||
if name in ALLOWED_MODULES: | ||
m = None | ||
if ALLOWED_MODULES[name] is None: | ||
m = importlib.import_module(name) | ||
ALLOWED_MODULES[name] = m | ||
else: | ||
m = ALLOWED_MODULES[name] | ||
|
||
return m | ||
|
||
raise Exception("'{0}' is not configured as a supported import module".format(name)) | ||
|
||
def get_query_result(query_id): | ||
try: | ||
|
@@ -19,10 +54,12 @@ def get_query_result(query_id): | |
|
||
return json.loads(query.latest_query_data.data) | ||
|
||
|
||
def execute_query(data_source_name, query): | ||
def execute_query(data_source_name_or_id, query): | ||
try: | ||
data_source = models.DataSource.get(models.DataSource.name==data_source_name) | ||
if type(data_source_name) == int: | ||
data_source = models.DataSource.get(models.DataSource.id==data_source_name_or_id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will update the code |
||
else: | ||
data_source = models.DataSource.get(models.DataSource.name==data_source_name_or_id) | ||
except models.DataSource.DoesNotExist: | ||
raise Exception("Wrong data source name: %s." % data_source_name) | ||
|
||
|
@@ -35,6 +72,26 @@ def execute_query(data_source_name, query): | |
# TODO: allow avoiding the json.dumps/loads in same process | ||
return json.loads(data) | ||
|
||
def add_result_column(result, column_name, friendly_name, column_type): | ||
""" Helper function to add columns inside a Python script running in re:dash in an easier way """ | ||
if column_type not in SUPPORTED_COLUMN_TYPES: | ||
raise Exception("'{0}' is not a supported column type".format(column_type)) | ||
|
||
if not "columns" in result: | ||
result["columns"] = [] | ||
|
||
result["columns"].append({ | ||
"name" : column_name, | ||
"friendly_name" : friendly_name, | ||
"type" : column_type | ||
}) | ||
|
||
def add_result_row(result, values): | ||
if not "rows" in result: | ||
result["rows"] = [] | ||
|
||
result["rows"].append(values) | ||
|
||
|
||
class Python(BaseQueryRunner): | ||
""" | ||
|
@@ -45,24 +102,63 @@ def configuration_schema(cls): | |
return { | ||
'type': 'object', | ||
'properties': { | ||
} | ||
'allowedImportModules': { | ||
'type': 'string', | ||
'title': 'Modules to import prior to running the script' | ||
} | ||
}, | ||
} | ||
|
||
@classmethod | ||
def enabled(cls): | ||
return enabled | ||
|
||
@classmethod | ||
def annotate_query(cls): | ||
return False | ||
|
||
def __init__(self, configuration_json): | ||
global ALLOWED_MODULES | ||
|
||
super(Python, self).__init__(configuration_json) | ||
|
||
if "allowedImportModules" in self.configuration and self.configuration["allowedImportModules"]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will update the code |
||
for item in self.configuration["allowedImportModules"].split(","): | ||
ALLOWED_MODULES[item] = None | ||
|
||
def run_query(self, query): | ||
try: | ||
error = None | ||
|
||
script_globals = {'get_query_result': get_query_result, 'execute_query': execute_query} | ||
script_locals = {'result': None} | ||
# TODO: timeout, sandboxing | ||
exec query in script_globals, script_locals | ||
code = compile_restricted(query, '<string>', 'exec') | ||
|
||
safe_builtins["_write_"] = custom_write | ||
safe_builtins["__import__"] = custom_import | ||
safe_builtins["_getattr_"] = getattr | ||
safe_builtins["getattr"] = getattr | ||
safe_builtins["_setattr_"] = setattr | ||
safe_builtins["setattr"] = setattr | ||
|
||
script_locals = { "result" : { "rows" : [], "columns" : [] } } | ||
|
||
restricted_globals = dict(__builtins__=safe_builtins) | ||
restricted_globals["get_query_result"] = get_query_result | ||
restricted_globals["execute_query"] = execute_query | ||
restricted_globals["add_result_column"] = add_result_column | ||
restricted_globals["add_result_row"] = add_result_row | ||
|
||
restricted_globals["TYPE_DATETIME"] = TYPE_DATETIME | ||
restricted_globals["TYPE_BOOLEAN"] = TYPE_BOOLEAN | ||
restricted_globals["TYPE_INTEGER"] = TYPE_INTEGER | ||
restricted_globals["TYPE_STRING"] = TYPE_STRING | ||
restricted_globals["TYPE_DATE"] = TYPE_DATE | ||
restricted_globals["TYPE_FLOAT"] = TYPE_FLOAT | ||
|
||
# TODO: Figure out the best way to have a timeout on a script | ||
# One option is to use ETA with Celery + timeouts on workers | ||
# And replacement of worker process every X requests handled. | ||
|
||
exec(code) in restricted_globals, script_locals | ||
|
||
if script_locals['result'] is None: | ||
raise Exception("result wasn't set to value.") | ||
|
@@ -76,4 +172,5 @@ def run_query(self, query): | |
|
||
return json_data, error | ||
|
||
|
||
register(Python) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,3 +25,4 @@ gunicorn==18.0 | |
celery==3.1.11 | ||
jsonschema==2.4.0 | ||
click==3.3 | ||
RestrictedPython==3.6.0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why
dict
and notset
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these are strings, AFAIK, using a dict means the "in" operator will use a hash search instead of matching the strings. In this case I think a set is the same as a list (maybe a bit better because it might be sorted).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interesting point, but does it really matter? Considering we're talking about 6 elements and rarely used function? I think the set variant will be easier to understand.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are the BDFL :-) will change it to a set.