Skip to content

Commit

Permalink
Excel & CSV query runner (#2478)
Browse files Browse the repository at this point in the history
* Excel query runner

* Param handling for read_excel

* CSV query runner

* Fix wrong module name

* Use yaml as query language

* Use yaml as query language for CSV

* Added icon and required modules

* Local address filtering

* Fix syntax error
  • Loading branch information
deecay committed Jul 27, 2021
1 parent ff7c5e8 commit b9cb819
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 1 deletion.
Binary file added client/app/assets/images/db-logos/excel.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
100 changes: 100 additions & 0 deletions redash/query_runner/csv.py
@@ -0,0 +1,100 @@
import logging
import yaml
import requests
import io

from redash import settings
from redash.query_runner import *
from redash.utils import json_dumps

logger = logging.getLogger(__name__)

try:
import pandas as pd
import numpy as np
enabled = True
except ImportError:
enabled = False


class CSV(BaseQueryRunner):
should_annotate_query = False

@classmethod
def name(cls):
return "CSV"

@classmethod
def enabled(cls):
return enabled

@classmethod
def configuration_schema(cls):
return {
'type': 'object',
'properties': {},
}

def __init__(self, configuration):
super(CSV, self).__init__(configuration)
self.syntax = "yaml"

def test_connection(self):
pass

def run_query(self, query, user):
path = ""
ua = ""
args = {}
try:
args = yaml.safe_load(query)
path = args['url']
args.pop('url', None)
ua = args['user-agent']
args.pop('user-agent', None)

if is_private_address(path) and settings.ENFORCE_PRIVATE_ADDRESS_BLOCK:
raise Exception("Can't query private addresses.")
except:
pass

try:
response = requests.get(url=path, headers={"User-agent": ua})
workbook = pd.read_csv(io.BytesIO(response.content),sep=",", **args)

df = workbook.copy()
data = {'columns': [], 'rows': []}
conversions = [
{'pandas_type': np.integer, 'redash_type': 'integer',},
{'pandas_type': np.inexact, 'redash_type': 'float',},
{'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')},
{'pandas_type': np.bool_, 'redash_type': 'boolean'},
{'pandas_type': np.object, 'redash_type': 'string'}
]
labels = []
for dtype, label in zip(df.dtypes, df.columns):
for conversion in conversions:
if issubclass(dtype.type, conversion['pandas_type']):
data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']})
labels.append(label)
func = conversion.get('to_redash')
if func:
df[label] = df[label].apply(func)
break
data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records')

json_data = json_dumps(data)
error = None
except KeyboardInterrupt:
error = "Query cancelled by user."
json_data = None
except Exception as e:
error = "Error reading {0}. {1}".format(path, str(e))
json_data = None

return json_data, error

def get_schema(self):
raise NotSupported()

register(CSV)
96 changes: 96 additions & 0 deletions redash/query_runner/excel.py
@@ -0,0 +1,96 @@
import logging
import yaml
import requests

from redash import settings
from redash.query_runner import *
from redash.utils import json_dumps

logger = logging.getLogger(__name__)

try:
import pandas as pd
import xlrd
import openpyxl
import numpy as np
enabled = True
except ImportError:
enabled = False

class Excel(BaseQueryRunner):
should_annotate_query = False

@classmethod
def enabled(cls):
return enabled

@classmethod
def configuration_schema(cls):
return {
'type': 'object',
'properties': {},
}

def __init__(self, configuration):
super(Excel, self).__init__(configuration)
self.syntax = "yaml"

def test_connection(self):
pass

def run_query(self, query, user):
path = ""
ua = ""
args = {}
try:
args = yaml.safe_load(query)
path = args['url']
args.pop('url', None)
ua = args['user-agent']
args.pop('user-agent', None)

if is_private_address(path) and settings.ENFORCE_PRIVATE_ADDRESS_BLOCK:
raise Exception("Can't query private addresses.")
except:
pass

try:
response = requests.get(url=path, headers={"User-agent": ua})
workbook = pd.read_excel(response.content, **args)

df = workbook.copy()
data = {'columns': [], 'rows': []}
conversions = [
{'pandas_type': np.integer, 'redash_type': 'integer',},
{'pandas_type': np.inexact, 'redash_type': 'float',},
{'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')},
{'pandas_type': np.bool_, 'redash_type': 'boolean'},
{'pandas_type': np.object, 'redash_type': 'string'}
]
labels = []
for dtype, label in zip(df.dtypes, df.columns):
for conversion in conversions:
if issubclass(dtype.type, conversion['pandas_type']):
data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']})
labels.append(label)
func = conversion.get('to_redash')
if func:
df[label] = df[label].apply(func)
break
data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records')

json_data = json_dumps(data)
error = None
except KeyboardInterrupt:
error = "Query cancelled by user."
json_data = None
except Exception as e:
error = "Error reading {0}. {1}".format(path, str(e))
json_data = None

return json_data, error

def get_schema(self):
raise NotSupported()

register(Excel)
4 changes: 3 additions & 1 deletion redash/settings/__init__.py
Expand Up @@ -380,7 +380,9 @@ def email_server_is_configured():
"redash.query_runner.cloudwatch",
"redash.query_runner.cloudwatch_insights",
"redash.query_runner.corporate_memory",
"redash.query_runner.sparql_endpoint"
"redash.query_runner.sparql_endpoint",
"redash.query_runner.excel",
"redash.query_runner.csv"
]

enabled_query_runners = array_from_string(
Expand Down
2 changes: 2 additions & 0 deletions requirements_all_ds.txt
Expand Up @@ -37,3 +37,5 @@ python-rapidjson==0.8.0
pyodbc==4.0.28
trino~=0.305
cmem-cmempy==21.2.3
xlrd==2.0.1
openpyxl==3.0.7

0 comments on commit b9cb819

Please sign in to comment.