<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->

## Utilities

In [1]:
#| echo: false
#| output: asis
show_doc(exec)

---

### exec

>      exec (obj, **kwargs)

In [2]:
#| echo: false
#| output: asis
show_doc(to_sql)

---

### to_sql

>      to_sql (l)

In [3]:
#| echo: false
#| output: asis
show_doc(quote_symbol)

---

### quote_symbol

>      quote_symbol (quote)

generate quote symbol to use for tables and columns

## Delay the Execution of Functions

In [4]:
#| echo: false
#| output: asis
show_doc(delayed_func)

---

### delayed_func

>      delayed_func (func)

return Delayed function

In [5]:
#| echo: false
#| output: asis
show_doc(DelayedFunc)

---

### DelayedFunc

>      DelayedFunc (func, args, kwargs, order=None)

Delay the execution of stored function until exec is run.

## Delay the Executionf of Methods

However, this is not enough for our purpose. We also need the ability to delay all instance methods until `.exec` is called. To expand this functionality, we introduce `DelayedPipeline` and the decorator `@delayed_methods`.

In [None]:
@delayed_func
def add_months(column, num, dialect='sql'):
    if dialect=='sql':
        return f'DATE_ADD(month, {num}, {column})'
    elif dialect=='snowflake':
        return f'MONTH_ADD({column}, {num})'


@delayed_func
def to_date(date, format=None, dialect='sql'):
    if dialect in ('sql', 'snowflake'):
        if format is None:
            return f"TO_DATE('{date}')"
        else:
            return f"TO_DATE('{date}', '{format}')"
    

test_eq(
    add_months(to_date('2020-01-01'), 1).exec(dialect='snowflake'),
    "MONTH_ADD(TO_DATE('2020-01-01'), 1)")
test_eq(
    add_months(to_date('2020-01-01'), 1).exec(dialect='sql'),
    "DATE_ADD(month, 1, TO_DATE('2020-01-01'))")

In [6]:
#| echo: false
#| output: asis
show_doc(patch_method)

---

### patch_method

>      patch_method (func=None, dialect='sql')

In [7]:
#| echo: false
#| output: asis
show_doc(patch_to)

---

### patch_to

>      patch_to (func, dialect='sql')

In [8]:
#| echo: false
#| output: asis
show_doc(union2tuple)

---

### union2tuple

>      union2tuple (t)

In [9]:
#| echo: false
#| output: asis
show_doc(eval_type)

---

### eval_type

>      eval_type (t)

`eval` a type or collection of types, if needed, for annotations in py3.10+

In [10]:
#| echo: false
#| output: asis
show_doc(DelayedPipeline)

---

### DelayedPipeline

>      DelayedPipeline ()

Execute delayed methods in self.dp in order when .exec is called.

In [11]:
#| echo: false
#| output: asis
show_doc(as_)

---

### as_

>      as_ (alias)

In [None]:
p = DelayedPipeline()
p.ress.append('field')
test_eq(p.as_('f').exec(dialect='sql'), 'field AS f')

In [None]:
@patch_method(dialect='snowflake')
def as_(self: DelayedPipeline, alias):
    self.alias = alias
    return f"{self.res} AS {alias}2"

dic2 = DelayedPipeline.dic
test_eq(dic2['as_']['dialect'].get('snowflake', None), as_)

p = DelayedPipeline()
p.ress.append('field')
test_eq(p.as_('f').exec(dialect='snowflake'), 'field AS f2')

`order_dp` re-orders self.dp by each DelayedFunc's `.order` attribute.
If `.order=None`, they are appended at the end without any re-ordering.

In [None]:
def f(*args, **kwargs):
    return args[0]

a = DelayedFunc(f, ('a',0), {}, order=2)
b = DelayedFunc(f, ('b',0), {}, order=1)
c = DelayedFunc(f, ('c',0), {}, order=None)
d = DelayedFunc(f, ('d',0), {})

p = DelayedPipeline()
p.dp = [a, b, c, d]
p._order_dp()

test_eq(
    [item.args for item in p.dp],
    [('b', 0), ('a', 0), ('c', 0), ('d', 0)]
)

## Arithmetic Expression

In [12]:
#| echo: false
#| output: asis
show_doc(Following)

---

### Following

>      Following (N=None)

Initialize self.  See help(type(self)) for accurate signature.

In [13]:
#| echo: false
#| output: asis
show_doc(Preceding)

---

### Preceding

>      Preceding (N=None)

Initialize self.  See help(type(self)) for accurate signature.

In [14]:
#| echo: false
#| output: asis
show_doc(OverClause)

---

### OverClause

>      OverClause (expression)

Initialize self.  See help(type(self)) for accurate signature.

In [15]:
#| echo: false
#| output: asis
show_doc(Criteria)

---

### Criteria

>      Criteria (this, op, other)

Execute delayed methods in self.dp in order when .exec is called.

In [16]:
#| echo: false
#| output: asis
show_doc(ArithmeticExpression)

---

### ArithmeticExpression

>      ArithmeticExpression (this, op, other)

Execute delayed methods in self.dp in order when .exec is called.

In [17]:
#| echo: false
#| output: asis
show_doc(Field)

---

### Field

>      Field (name=None, window_func=True)

Execute delayed methods in self.dp in order when .exec is called.

In [None]:
a = Field('a')
b = Field('b')
aa = (a + 1>2) & ((b-1<=10) | (b>100))
aa.exec()
(a+1<13).exec()

'a + 1 > 2 and (b - 1 <= 10 or b > 100)'

'a + 1 < 13'

In [None]:
a = Field('a')
b = Field('b')
test_eq(((a + 1)/3).exec(), '(a + 1) / 3')
test_eq(((a + 1)/(b - 4)).exec(), '(a + 1) / (b - 4)')
test_eq(((a + 1>2) & ((b-1<10) | (b>23)) ).exec(), 
        'a + 1 > 2 and (b - 1 < 10 or b > 23)')

In [None]:
test_eq(
    OverClause('SUM(col1)').over('col0').orderby('col2').rows(Preceding(3), CURRENT_ROW).exec(), 
    'SUM(col1) OVER (PARTITION BY col0 ORDER BY col2 ROWS BETWEEN 3 PRECEDING AND CURRENT_ROW)')

test_eq(
    ((Field('col2')+2)/10).over('col1').orderby('col3').range(Preceding(), Following(2)).exec(),
    '(col2 + 2) / 10 OVER (PARTITION BY col1 ORDER BY col3 RANGE BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)'
)

## Table

In [None]:
class Table:
    def __init__(self, name) -> None:
        store_attr()
        self.alias = name
        # self.name = self.alias = name

    def as_(self, alias):
        self.alias = alias
        return self

    def __getattr__(self, __name: str):
        if __name.startswith('__'):
            raise AttributeError
            # return super().__getattr__(__name)
        else:
            return Field(f"{self.alias}.{__name}")
    
    def exec(self, **kwargs):
        if self.alias != self.name:
            return f"{self.name} as {self.alias}"
        else:
            return self.name

In [None]:
vw = Table('vw')
vw.as_('a').exec()
(vw.column + 2 > 1).exec()

'vw as a'

'a.column + 2 > 1'

## Functions

In [None]:
def _kwargs_func(func, *args, **kwargs):
    "Allow arbitrary kwargs. Only pass those kwargs that are specified in func to func."
    sig = inspect.signature(func)
    param = sig.parameters
    func_kwargs = {k:v for k, v in param.items() if v.default!=inspect._empty}
    kwargs = {k:v for k, v in kwargs.items() if k in func_kwargs}
    return func(*args, **kwargs)


def custom_func(func=None, window_func=False, dialect=None):
    """return Field"""
    if func is None: 
        return partial(custom_func, window_func=window_func, dialect=dialect)
    else:
        if dialect is None:
            def wrapper(*args, **kwargs):
                dlf = DelayedFunc(func, args, kwargs)
                f = Field(window_func=window_func)
                f.dp.append(dlf)
                return f
        else:
            def wrapper(*args, **kwargs):
                # get the function
                func_name = func.__name__
                ori_func = globals()[func_name] 
                func

                # make new function
                def new_func(*args, **kwargs):
                    if kwargs['dialect'] == dialect:
                        # return func(*args, **kwargs)
                        return _kwargs_func(func, *args, **kwargs)
                    else:
                        func_o = ori_func.dp[0].func
                        return _kwargs_func(func_o, *args, **kwargs)

                # make new delayed function
                dlf = DelayedFunc(new_func, args, kwargs)

                # append the delayed function to Field.dp
                f = Field(window_func=window_func)
                f.dp.append(dlf)
                return f
        return wrapper


class CustomFunction(Field):
    def __init__(self, func_name, args) -> None:
        super().__init__()
        self.func_name = func_name
        self.args = args
    
    def __call__(self, *args):
        if len(args) != len(self.args):
            raise ValueError(f"The number of args provided {len(args)} is not the same as the number of args expected by this function ({len(self.args)})!")
        def func(*args):
            return f"{self.func_name}({', '.join(args)})"

        dlf = DelayedFunc(func, args, {})
        self.dp.append(dlf)
        return self


@custom_func
def add_months(column, num, dialect='sql'):
    if dialect=='sql':
        return f'DATE_ADD(month, {num}, {column})'
    elif dialect=='snowflake':
        return f'MONTH_ADD({column}, {num})'

In [None]:
date_diff = CustomFunction('DATE_DIFF', ['interval', 'start_date', 'end_date'])
test_eq(date_diff('month', Field('date1'), Field('date2')).exec(), 'DATE_DIFF(month, date1, date2)')

In [None]:
test_eq((add_months("col1", 3)-2 > 2).exec(), 'DATE_ADD(month, 3, col1) - 2 > 2')
test_eq((add_months("col1", 3)-2 > 2).exec(dialect='snowflake'), 'MONTH_ADD(col1, 3) - 2 > 2')

In [18]:
def test_func(s, se, ksew=2):
    return 0

import inspect
sig = inspect.signature(test_func)
param = sig.parameters
{k:v for k, v in param.items() if v.default!=inspect._empty}

{'ksew': <Parameter "ksew=2">}

In [None]:
@custom_func(dialect='athena')
def add_months(column, num):
    return f"DATE_ADD('month', {num}, {column})"

(add_months("col1", 5)).exec(dialect='athena')

"DATE_ADD('month', 5, col1)"

## Case

In [None]:
class Case:
    def __init__(self) -> None:
        self.dp = []
        self.alias = None

    def check_prev(self, statement):
        if self.dp:
            prev = self.dp[0][0]
            if prev == statement:
                return True
        return False

    def when(self, q, then):
        if self.check_prev('ELSE'):
            raise ValueError(f"'WHEN' can not follow 'ELSE'!")
        self.dp.append(('WHEN', q, then))
        return self

    def else_(self, q):
        self.dp.append(('ELSE', q))
        return self

    def _as(self, alias):
        self.alias = alias
        return self

    def exec(self, **kwargs):
        sql = ["CASE"]
        for item in self.dp:
            if item[0] == 'WHEN':
                q_resolved = f"WHEN {exec(item[1], **kwargs)} THEN {exec(item[2], **kwargs)}"
            else:
                q_resolved = f"ELSE {exec(item[1], **kwargs)}"
            sql.append(q_resolved)
        if self.alias:
            sql.append(f"END AS {self.alias}")
        else:
            sql.append("END")
        return '\n'.join(sql)

In [None]:
a = Table('tbl')
test_eq(Case().when(a.column1>3, True).else_(False).exec(), 
        'CASE\nWHEN tbl.column1 > 3 THEN True\nELSE False\nEND')
test_eq(Case().when(a.column1>3, 1).when(a.column1<1, -1).else_(0).exec(), 
        'CASE\nWHEN tbl.column1 > 3 THEN 1\nWHEN tbl.column1 < 1 THEN -1\nELSE 0\nEND')

## Query

In [None]:
class Query:
    @classmethod
    def from_(cls, query):
        q = SelectQuery()
        q.dic['from'] = query
        return q

    @classmethod
    def with_(cls, query, alias):
        q = SelectQuery()
        q.dic['with'] = [(query, alias)]
        return q


class QueryBase:
    pass


class Joiner(QueryBase):
    def __init__(self, select_query, query, how=None) -> None:
        store_attr()

    def on(self, condition):
        l = self.select_query.dic.get('join', [])
        l.append((self.query, self.how, condition))
        self.select_query.dic['join'] = l
        return self.select_query


class SelectQuery(QueryBase):
    keys_simple = ['from', 'group by', 'order by', 'where', 'having', 'limit']
    keys_parse = ['with', 'from', 'join', 'on', 'where', 'group by', 'having', 'order by', 'select', 'limit']
    keys_sql = ['with', 'select', 'from', 'join', 'on', 'where', 'group by', 'having', 'order by', 'limit']
    key_translate = {
        'groupby': 'group by',
        'orderby': 'order by'
    }

    def __init__(self) -> None:
        super().__init__()
        self.dic = {}

    def join(self, query, how=None):
        q = Joiner(self, query, how)
        return q

    @staticmethod
    def _resolve(q, **kwargs):
        if getattr(q, 'exec', None):
            return q.exec(**kwargs)
        else:
            return str(q)

    def __getattr__(self, __name):
        __name = __name.strip('_')

        if __name.startswith('exec'):
            # for exec_{key} methods
            key = __name.split('_')[-1]

            def inner(**kwargs):
                if key in self.keys_simple:
                    q = self.dic[key]
                    if isinstance(q, QueryBase):
                        return f"{self.key_translate.get(key, key)} ({self._resolve(q, **kwargs)})"
                    else:
                        return f"{self.key_translate.get(key, key)} {self._resolve(q, **kwargs)}"
                elif key == 'with':
                    s = self.dic[key]
                    qq = [f"{a} as ({self._resolve(q, **kwargs)})" for q, a in s]
                    return f"{key} " + ",\n".join(qq)
                elif key == 'join':
                    l = self.dic[key]
                    parsed = []
                    for q, how, cond in l:
                        if how:
                            how = how + ' '
                        else:
                            how = ''
                        if isinstance(q, QueryBase):
                            sub_q = f"({self._resolve(q, **kwargs)})"
                        else:
                            sub_q = self._resolve(q, **kwargs)
                        parsed.append(f"{how}join {sub_q} on {self._resolve(cond, **kwargs)}")
                    return '\n'.join(parsed)
                elif key == 'select':
                    args = self.dic[key]
                    return f"{key} " + ', '.join([self._resolve(arg, **kwargs) for arg in args])
                else:
                    raise AttributeError
        elif __name in self.keys_simple:
            def inner(query):
                self.dic[__name] = query
                return self
        elif __name in ['with']:
            def inner(query, alias):
                l = self.dic.get('with', [])
                l.append((query, alias))
                return self
        elif __name in ['select']:
            def inner(*args):
                self.dic[__name] = list(args)
                return self
        else:
            raise AttributeError


        return inner


    def exec(self, **kwargs):
        dic_sql = {}

        for key in self.keys_parse:
            if key in self.dic:
                dic_sql[key] = getattr(self, f'exec_{key}')(**kwargs)

        sql = '\n'.join([dic_sql[key] for key in self.keys_sql if key in dic_sql])
        return sql
    
    def union(self, query):
        if not isinstance(query, QueryBase):
            raise TypeError(f"{query} is not an instance of QueryBase!")
        return UnionQuery(self, query, union_type='UNION')

    def __add__(self, query):
        return self.union(query)

    def union_all(self, query):
        if not isinstance(query, QueryBase):
            raise TypeError(f"{query} is not an instance of QueryBase!")
        return UnionQuery(self, query, union_type='UNION ALL')

    def __mul__(self, query):
        return self.union_all(query)


class UnionQuery:
    def __init__(self, q1, q2, union_type='UNION') -> None:
        store_attr()

    def exec(self, **kwargs):
        return f"{exec(self.q1, **kwargs)} {self.union_type} {exec(self.q2, **kwargs)}"

In [None]:
q0 = (Query
      .from_(Table('tbl').as_('a'))
      .select('col1')
      .where((Field('col2')-100>2) & (Field('col3')/9<=1))
      .limit(100)
)
test_eq(q0.exec(), 
"""select col1
from tbl as a
where col2 - 100 > 2 and col3 / 9 <= 1
limit 100""")

qj = (Query
     .with_(Query.from_(Table('tbl').as_('a')).select('col1'), 's')     
     .with_(q0, 'm')
     .from_('s')
     .join('m').on('s.col1=m.col1')
     .where(Field('col1')>=10)
     .select('s.col1', 'm.col2', 'm.col3')
)
test_eq(qj.exec(), 
"""with s as (select col1
from tbl as a),
m as (select col1
from tbl as a
where col2 - 100 > 2 and col3 / 9 <= 1
limit 100)
select s.col1, m.col2, m.col3
from s
join m on s.col1=m.col1
where col1 >= 10""")