Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Excel & CSV query runner #2478

Open
wants to merge 7 commits into
base: master
from
Open
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

@@ -0,0 +1,89 @@
import json
import logging
import yaml

from redash.query_runner import *
from redash.utils import JSONEncoder

logger = logging.getLogger(__name__)

try:
import pandas as pd
import numpy as np
enabled = True
except ImportError:
enabled = False


class CSV(BaseQueryRunner):
@classmethod
def annotate_query(cls):
return False

@classmethod
def type(cls):
return "csv"

@classmethod
def enabled(cls):
return enabled

@classmethod
def configuration_schema(cls):
return {
'type': 'object',
'properties': {},
}

def __init__(self, configuration):
super(CSV, self).__init__(configuration)
self.syntax = "csv"

def test_connection(self):
pass

def run_query(self, query, user):
path = ""
args = {}
try:
args = yaml.safe_load(query)
path = args['url']
args.pop('url', None)
except:
pass
try:
workbook = pd.read_csv(path, **args)

df = workbook.copy()
data = {'columns': [], 'rows': []}
conversions = [
{'pandas_type': np.integer, 'redash_type': 'integer',},
{'pandas_type': np.inexact, 'redash_type': 'float',},
{'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')},
{'pandas_type': np.bool_, 'redash_type': 'boolean'},
{'pandas_type': np.object, 'redash_type': 'string'}
]
labels = []
for dtype, label in zip(df.dtypes, df.columns):
for conversion in conversions:
if issubclass(dtype.type, conversion['pandas_type']):
data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']})
labels.append(label)
func = conversion.get('to_redash')
if func:
df[label] = df[label].apply(func)
break
data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records')

json_data = json.dumps(data, cls=JSONEncoder)
error = None
except KeyboardInterrupt:
error = "Query cancelled by user."
json_data = None
except Exception as e:
error = "Error reading {0}. {1}".format(path, str(e))
json_data = None

return json_data, error

register(CSV)
@@ -0,0 +1,91 @@
import json
import logging
import yaml

from redash.query_runner import *
from redash.utils import JSONEncoder

logger = logging.getLogger(__name__)

try:
import pandas as pd
import xlrd
import numpy as np
enabled = True
except ImportError:
enabled = False


class Excel(BaseQueryRunner):
@classmethod
def annotate_query(cls):
return False

@classmethod
def type(cls):
return "excel"

@classmethod
def enabled(cls):
return enabled

@classmethod
def configuration_schema(cls):
return {
'type': 'object',
'properties': {},
}

def __init__(self, configuration):
super(Excel, self).__init__(configuration)
self.syntax = "excel"

def test_connection(self):
pass

def run_query(self, query, user):
path = ""
args = {}
try:
args = yaml.safe_load(query)
path = args['url']
args.pop('url', None)
except:
pass

try:
workbook = pd.read_excel(path, **args)

df = workbook.copy()
data = {'columns': [], 'rows': []}
conversions = [
{'pandas_type': np.integer, 'redash_type': 'integer',},
{'pandas_type': np.inexact, 'redash_type': 'float',},
{'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')},
{'pandas_type': np.bool_, 'redash_type': 'boolean'},
{'pandas_type': np.object, 'redash_type': 'string'}
]
labels = []
for dtype, label in zip(df.dtypes, df.columns):
for conversion in conversions:
if issubclass(dtype.type, conversion['pandas_type']):
data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']})
labels.append(label)
func = conversion.get('to_redash')
if func:
df[label] = df[label].apply(func)
break
data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records')

json_data = json.dumps(data, cls=JSONEncoder)
error = None
except KeyboardInterrupt:
error = "Query cancelled by user."
json_data = None
except Exception as e:
error = "Error reading {0}. {1}".format(path, str(e))
json_data = None

return json_data, error

register(Excel)
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.