Skip to content

Commit

Permalink
Presto macros
Browse files Browse the repository at this point in the history
  • Loading branch information
mistercrunch committed Oct 25, 2016
1 parent ac7de6d commit 0570149
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 22 deletions.
1 change: 1 addition & 0 deletions caravel/assets/javascripts/SqlLab/components/SqlEditor.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class SqlEditor extends React.Component {
sql: this.props.queryEditor.sql,
sqlEditorId: this.props.queryEditor.id,
tab: this.props.queryEditor.title,
schema: this.props.queryEditor.schema,
tempTableName: this.state.ctas,
runAsync,
ctas,
Expand Down
7 changes: 3 additions & 4 deletions caravel/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
INFER_COL_TYPES_SAMPLE_SIZE = 100


# http://pandas.pydata.org/pandas-docs/stable/internals.html#
# subclassing-pandas-data-structures
class CaravelDataFrame(object):
def __init__(self, df):
self.__df = df.where((pd.notnull(df)), None)
Expand Down Expand Up @@ -91,13 +89,14 @@ def datetime_conversion_rate(data_series):


def is_date(dtype):
return dtype.name.startswith('datetime')
if dtype.name:
return dtype.name.startswith('datetime')


def is_dimension(dtype, column_name):
if is_id(column_name):
return False
return dtype == np.object or dtype == np.bool
return dtype.name in ('object', 'bool')


def is_id(column_name):
Expand Down
135 changes: 132 additions & 3 deletions caravel/jinja_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@
import inspect
import jinja2

from copy import copy
from datetime import datetime
import logging
import time
import textwrap
import uuid
import random

from caravel import app
from caravel.utils import CaravelTemplateException

config = app.config

Expand Down Expand Up @@ -41,11 +45,136 @@ def __init__(self, database, query):


class PrestoContext(BaseContext):
"""Presto Jinja context
The methods described here are namespaced under ``presto`` in the
jinja context as in ``SELECT '{{ presto.some_macro_call() }}'``
"""
engine = 'presto'

db_contexes = {
o.engine: o for o in globals().values()
if o and inspect.isclass(o) and issubclass(o, BaseContext)}
@staticmethod
def _partition_query(table_name, limit=0, order_by=None, filters=None):
"""Returns a partition query
:param table_name: the name of the table to get partitions from
:type table_name: str
:param limit: the number of partitions to be returned
:type limit: int
:param order_by: a list of tuples of field name and a boolean
that determines if that field should be sorted in descending
order
:type order_by: list of (str, bool) tuples
:param filters: a list of filters to apply
:param filters: dict of field anme and filter value combinations
"""
limit_clause = "LIMIT {}".format(limit) if limit else ''
order_by_clause = ''
if order_by:
l = []
for field, desc in order_by:
l.append(field + ' DESC' if desc else '')
order_by_clause = 'ORDER BY ' + ', '.join(l)

where_clause = ''
if filters:
l = []
for field, value in filters.items():
l.append("{field} = '{value}'".format(**locals()))
where_clause = 'WHERE ' + ' AND '.join(l)

sql = textwrap.dedent("""\
SHOW PARTITIONS FROM {table_name}
{where_clause}
{order_by_clause}
{limit_clause}
""").format(**locals())
return sql

@staticmethod
def _schema_table(table_name, schema):
for i in range(10):
print([table_name, schema])
if '.' in table_name:
schema, table_name = table_name.split('.')
return table_name, schema

def latest_partition(self, table_name):
"""Returns the latest (max) partition value for a table
:param table_name: the name of the table, can be just the table
name or a fully qualified table name as ``schema_name.table_name``
:type table_name: str
>>> latest_partition('foo_table')
'2018-01-01'
"""
table_name, schema = self._schema_table(table_name, self.schema)
indexes = self.database.get_indexes(table_name, schema)
if len(indexes[0]['column_names']) < 1:
raise CaravelTemplateException(
"The table should have one partitioned field")
elif len(indexes[0]['column_names']) > 1:
raise CaravelTemplateException(
"The table should have a single partitioned field "
"to use this function. You may want to use "
"`presto.latest_sub_partition`")
part_field = indexes[0]['column_names'][0]
sql = self._partition_query(table_name, 1, [(part_field, True)])
df = self.database.get_df(sql, schema)
return df.to_records(index=False)[0][0]

def latest_sub_partition(self, table_name, **kwargs):
"""Returns the latest (max) partition value for a table
A filtering criteria should be passed for all fields that are
partitioned except for the field to be returned. For example,
if a table is partitioned by (``ds``, ``event_type`` and
``event_category``) and you want the latest ``ds``, you'll want
to provide a filter as keyword arguments for both
``event_type`` and ``event_category`` as in
``latest_sub_partition('my_table',
event_category='page', event_type='click')``
:param table_name: the name of the table, can be just the table
name or a fully qualified table name as ``schema_name.table_name``
:type table_name: str
:param kwargs: keyword arguments define the filtering criteria
on the partition list. There can be many of these.
:type kwargs: str
>>> latest_sub_partition('sub_partition_table', event_type='click')
'2018-01-01'
"""
table_name, schema = self._schema_table(table_name, self.schema)
indexes = self.database.get_indexes(table_name, schema)
part_fields = indexes[0]['column_names']
for k in kwargs.keys():
if k not in k in part_field:
msg = "Field [{k}] is not part of the partionning key"
raise CaravelTemplateException(msg)
if len(kwargs.keys()) != len(part_fields) - 1:
msg = (
"A filter needs to be specified for {} out of the "
"{} fields."
).format(len(part_fields)-1, len(part_fields))
raise CaravelTemplateException(msg)

for field in part_fields:
if field not in kwargs.keys():
field_to_return = field

sql = self._partition_query(
table_name, 1, [(field_to_return, True)], kwargs)
df = self.database.get_df(sql, schema)
if df.empty:
return ''
return df.to_dict()[field_to_return][0]


db_contexes = {}
keys = copy(globals().keys())
for k in keys:
o = globals()[k]
if o and inspect.isclass(o) and issubclass(o, BaseContext):
db_contexes[o.engine] = o


def get_context(engine_name=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Slice(Base):
druid_datasource_id = Column(Integer, ForeignKey('datasources.id'))
table_id = Column(Integer, ForeignKey('tables.id'))
perm = Column(String(2000))

def upgrade():
bind = op.get_bind()
op.add_column('slices', sa.Column('perm', sa.String(length=2000), nullable=True))
Expand Down
4 changes: 4 additions & 0 deletions caravel/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ class NoDataException(CaravelException):
pass


class CaravelTemplateException(CaravelException):
pass


def can_access(security_manager, permission_name, view_name):
"""Protecting from has_access failing from missing perms/view"""
try:
Expand Down
2 changes: 1 addition & 1 deletion dev-reqs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ mysqlclient
nose
psycopg2
sphinx
sphinx_bootstrap_theme
sphinx-rtd-theme
sphinxcontrib.youtube
26 changes: 17 additions & 9 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import sys
import os
import shlex
import sphinx_bootstrap_theme
import sphinx_rtd_theme
# import sphinx_bootstrap_theme

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
Expand Down Expand Up @@ -51,8 +52,8 @@
master_doc = 'index'

# General information about the project.
project = u'caravel'
copyright = u'2015, Maxime Beauchemin, Airbnb'
project = "Caravel's documentation"
copyright = None
author = u'Maxime Beauchemin'

# The version info for the project you're documenting, acts as replacement for
Expand Down Expand Up @@ -113,20 +114,27 @@

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'bootstrap'
html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
# html_theme = 'bootstrap'
# html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
# 'bootswatch_theme': 'cosmo',
'navbar_title': 'Caravel Documentation',
'navbar_fixed_top': "false",
'navbar_sidebarrel': False,
'navbar_site_name': "Topics",
#'navbar_title': 'Caravel Documentation',
#'navbar_fixed_top': "false",
#'navbar_sidebarrel': False,
#'navbar_site_name': "Topics",
#'navbar_class': "navbar navbar-left",
}
html_theme_options = {
'collapse_navigation': False,
'display_version': False,
#'navigation_depth': 3,
}

# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
Expand Down
25 changes: 24 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Caravel's documentation
'''''''''''''''''''''''

Caravel is a data exploration platform designed to be visual, intuitive
and interactive.

.. image:: _static/img/caravel.jpg

.. warning:: This project used to be name Panoramix and has been renamed
----------------

.. warning:: This project used to be named Panoramix and has been renamed
to Caravel in March 2016

Overview
Expand All @@ -24,6 +32,21 @@ Features
- Integration with most RDBMS through SqlAlchemy
- Deep integration with Druid.io

------

.. image:: https://camo.githubusercontent.com/82e264ef777ba06e1858766fe3b8817ee108eb7e/687474703a2f2f672e7265636f726469742e636f2f784658537661475574732e676966

------

.. image:: https://camo.githubusercontent.com/4991ff37a0005ea4e4267919a52786fda82d2d21/687474703a2f2f672e7265636f726469742e636f2f755a6767594f645235672e676966

------

.. image:: https://camo.githubusercontent.com/a389af15ac1e32a3d0fee941b4c62c850b1d583b/687474703a2f2f672e7265636f726469742e636f2f55373046574c704c76682e676966

------


Contents
---------

Expand Down
10 changes: 7 additions & 3 deletions docs/sqllab.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ Feature Overview
- supports defining a "results backend" to persist query results
- A search engine to find queries executed in the past
- Supports templating using the
`Jinja templating language <http://jinja.pocoo.org/docs/dev/>`
`Jinja templating language <http://jinja.pocoo.org/docs/dev/>`_
which allows for using macros in your SQL code


Templating with Jinja
---------------------

.code ::
.. code-block:: sql
SELECT *
FROM some_table
Expand All @@ -49,4 +49,8 @@ Caravel's Jinja context:
- ``random``: ``random``
- more to come!

`Jinja's builtin filters <http://jinja.pocoo.org/docs/dev/templates/#builtin-filters>_` can be also be applied where needed.
`Jinja's builtin filters <http://jinja.pocoo.org/docs/dev/templates/>`_ can be also be applied where needed.


.. autoclass:: caravel.jinja_context.PrestoContext
:members:

0 comments on commit 0570149

Please sign in to comment.