Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Excel & CSV query runner #2478

Merged
merged 11 commits into from Jul 27, 2021
89 changes: 89 additions & 0 deletions redash/query_runner/csv.py
@@ -0,0 +1,89 @@
import json
import logging
import yaml

from redash.query_runner import *
from redash.utils import JSONEncoder

logger = logging.getLogger(__name__)

try:
import pandas as pd
import numpy as np
enabled = True
except ImportError:
enabled = False


class CSV(BaseQueryRunner):
@classmethod
def annotate_query(cls):
return False

@classmethod
def type(cls):
return "csv"

@classmethod
def enabled(cls):
return enabled

@classmethod
def configuration_schema(cls):
return {
'type': 'object',
'properties': {},
}

def __init__(self, configuration):
super(CSV, self).__init__(configuration)
self.syntax = "csv"

def test_connection(self):
pass

def run_query(self, query, user):
path = ""
args = {}
try:
args = yaml.safe_load(query)
path = args['url']
args.pop('url', None)
except:
pass
try:
workbook = pd.read_csv(path, **args)

df = workbook.copy()
data = {'columns': [], 'rows': []}
conversions = [
{'pandas_type': np.integer, 'redash_type': 'integer',},
{'pandas_type': np.inexact, 'redash_type': 'float',},
{'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')},
{'pandas_type': np.bool_, 'redash_type': 'boolean'},
{'pandas_type': np.object, 'redash_type': 'string'}
]
labels = []
for dtype, label in zip(df.dtypes, df.columns):
for conversion in conversions:
if issubclass(dtype.type, conversion['pandas_type']):
data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']})
labels.append(label)
func = conversion.get('to_redash')
if func:
df[label] = df[label].apply(func)
break
data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records')

json_data = json.dumps(data, cls=JSONEncoder)
error = None
except KeyboardInterrupt:
error = "Query cancelled by user."
json_data = None
except Exception as e:
error = "Error reading {0}. {1}".format(path, str(e))
json_data = None

return json_data, error

register(CSV)
91 changes: 91 additions & 0 deletions redash/query_runner/excel.py
@@ -0,0 +1,91 @@
import json
import logging
import yaml

from redash.query_runner import *
from redash.utils import JSONEncoder

logger = logging.getLogger(__name__)

try:
import pandas as pd
import xlrd
import numpy as np
enabled = True
except ImportError:
enabled = False


class Excel(BaseQueryRunner):
@classmethod
def annotate_query(cls):
return False

@classmethod
def type(cls):
return "excel"

@classmethod
def enabled(cls):
return enabled

@classmethod
def configuration_schema(cls):
return {
'type': 'object',
'properties': {},
}

def __init__(self, configuration):
super(Excel, self).__init__(configuration)
self.syntax = "excel"

def test_connection(self):
pass

def run_query(self, query, user):
path = ""
args = {}
try:
args = yaml.safe_load(query)
path = args['url']
args.pop('url', None)
except:
pass

try:
workbook = pd.read_excel(path, **args)

df = workbook.copy()
data = {'columns': [], 'rows': []}
conversions = [
{'pandas_type': np.integer, 'redash_type': 'integer',},
{'pandas_type': np.inexact, 'redash_type': 'float',},
{'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')},
{'pandas_type': np.bool_, 'redash_type': 'boolean'},
{'pandas_type': np.object, 'redash_type': 'string'}
]
labels = []
for dtype, label in zip(df.dtypes, df.columns):
for conversion in conversions:
if issubclass(dtype.type, conversion['pandas_type']):
data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']})
labels.append(label)
func = conversion.get('to_redash')
if func:
df[label] = df[label].apply(func)
break
data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records')

json_data = json.dumps(data, cls=JSONEncoder)
error = None
except KeyboardInterrupt:
error = "Query cancelled by user."
json_data = None
except Exception as e:
error = "Error reading {0}. {1}".format(path, str(e))
json_data = None

return json_data, error

register(Excel)