In [1]:
from pprint import pprint

import sqlparse
import black

from data_algebra.data_ops import *
import data_algebra.env
import data_algebra.yaml
import data_algebra.PostgreSQL

def pretty_sql(op, db_model):
    sql = op.to_sql(db_model=db_model)
    return sqlparse.format(sql, reindent=True, keyword_case="upper")

db_model = data_algebra.PostgreSQL.PostgreSQLModel()

_, _1, _2, _get = [None, None, None, lambda x: x] # don't look unbound
data_algebra.env.push_onto_namespace_stack(locals())

t1 = TableDescription('t1', ['x', 'y'])

In [2]:
print(t1)

TableDescription(table_name='t1', column_names=['x', 'y'])


In [3]:
print(pretty_sql(t1, db_model))


SELECT "y",
       "x"
FROM "t1"


In [4]:
ops = t1 . extend({'v':'x + 1'})

In [5]:
print(ops)

TableDescription(table_name='t1', column_names=['x', 'y']) .\
   extend({'v': '(x + 1)'})


In [6]:
print(pretty_sql(ops, db_model))

SELECT "y",
       "x",
       ("x" + 1) AS "v"
FROM
  (SELECT "y",
          "x"
   FROM "t1") "SQ_0"


In [7]:
opse = t1 . extend({'y':'y.max()'}, partition_by = ['x'])
print(opse)
print(pretty_sql(opse, db_model))

TableDescription(table_name='t1', column_names=['x', 'y']) .\
   extend({'y': 'y.max()'}, partition_by=['x'])
SELECT "x",
       MAX("y") OVER (PARTITION BY "x") AS "y"
FROM
  (SELECT "y",
          "x"
   FROM "t1") "SQ_0"


In [8]:
t1b = TableDescription('t1', ['x', 'y', 'one_more'])
t2 = TableDescription('t2', ['x', 'z'])

ops = ( 
        t1 . 
            extend({'x':'x + 1'}) . 
            natural_join(
                b=t1b,
                by=['x', 'y']) . 
            natural_join(
                b=(t2 . extend({'f':'x + 1'})),
                by='x') 
)

try:
    print(ops.get_tables())
except Exception as ex:
    print(ex)

Two tables with key t1 have different column sets.


In [9]:
t1b = TableDescription('t1', ['x', 'y'])
t2 = TableDescription('t2', ['x', 'z'])

ops = ( 
        t1 . 
            extend({'x':'x + 1'}) . 
            natural_join(
                b=t1b,
                by=['x', 'y']) . 
            natural_join(
                b=(t2 . extend({'f':'x + 1'})),
                by='x') 
)
print(ops)

TableDescription(table_name='t1', column_names=['x', 'y']) .\
   extend({'x': '(x + 1)'}) .\
   natural_join(b=(
      TableDescription(table_name='t1', column_names=['x', 'y'])),
      by=['x', 'y'], jointype='INNER') .\
   natural_join(b=(
      TableDescription(table_name='t2', column_names=['x', 'z']) .\
         extend({'f': '(x + 1)'})),
      by=['x'], jointype='INNER')


In [10]:
print(ops.get_tables())

{'t1': TableDescription(table_name='t1', column_names=['x', 'y']), 't2': TableDescription(table_name='t2', column_names=['x', 'z'])}


In [11]:
print(ops.columns_used())

{'t1': {'y', 'x'}, 't2': {'x', 'z'}}


In [12]:
print(pretty_sql(ops, db_model))

SELECT COALESCE("LQ_4"."x", "RQ_5"."x") AS "x",
       "y",
       "f",
       "z"
FROM
  (SELECT COALESCE("LQ_1"."y", "RQ_2"."y") AS "y",
          COALESCE("LQ_1"."x", "RQ_2"."x") AS "x"
   FROM
     (SELECT "y",
             ("x" + 1) AS "x"
      FROM
        (SELECT "y",
                "x"
         FROM "t1") "SQ_0") "LQ_1"
   INNER JOIN
     (SELECT "y",
             "x"
      FROM "t1") "RQ_2") "LQ_4"
INNER JOIN
  (SELECT "x",
          "z",
          ("x" + 1) AS "f"
   FROM
     (SELECT "x",
             "z"
      FROM "t2") "SQ_3") "RQ_5"


In [13]:
p = ops.collect_representation()
pprint(p)

[OrderedDict([('op', 'TableDescription'),
              ('table_name', 't1'),
              ('qualifiers', {}),
              ('column_names', ['x', 'y']),
              ('key', 't1')]),
 OrderedDict([('op', 'Extend'),
              ('ops', {'x': '(x + 1)'}),
              ('partition_by', []),
              ('order_by', []),
              ('reverse', [])]),
 OrderedDict([('op', 'NaturalJoin'),
              ('by', ['x', 'y']),
              ('jointype', 'INNER'),
              ('b',
               [OrderedDict([('op', 'TableDescription'),
                             ('table_name', 't1'),
                             ('qualifiers', {}),
                             ('column_names', ['x', 'y']),
                             ('key', 't1')])])]),
 OrderedDict([('op', 'NaturalJoin'),
              ('by', ['x']),
              ('jointype', 'INNER'),
              ('b',
               [OrderedDict([('op', 'TableDescription'),
                             ('table_name', 't2'),
              

In [14]:
import yaml
dmp = yaml.dump(p)
print(dmp)

- op: TableDescription
  table_name: t1
  qualifiers: {}
  column_names:
  - x
  - y
  key: t1
- op: Extend
  ops:
    x: (x + 1)
  partition_by: []
  order_by: []
  reverse: []
- op: NaturalJoin
  by:
  - x
  - y
  jointype: INNER
  b:
  - op: TableDescription
    table_name: t1
    qualifiers: {}
    column_names:
    - x
    - y
    key: t1
- op: NaturalJoin
  by:
  - x
  jointype: INNER
  b:
  - op: TableDescription
    table_name: t2
    qualifiers: {}
    column_names:
    - x
    - z
    key: t2
  - op: Extend
    ops:
      f: (x + 1)
    partition_by: []
    order_by: []
    reverse: []



In [15]:
ops_back = data_algebra.yaml.to_pipeline(yaml.safe_load(dmp))
print(ops_back)

TableDescription(table_name='t1', column_names=['x', 'y']) .\
   extend({'x': '(x + 1)'}) .\
   natural_join(b=(
      TableDescription(table_name='t1', column_names=['x', 'y'])),
      by=['x', 'y'], jointype='INNER') .\
   natural_join(b=(
      TableDescription(table_name='t2', column_names=['x', 'z']) .\
         extend({'f': '(x + 1)'})),
      by=['x'], jointype='INNER')


In [16]:
str = ops_back.to_python()
print(str)

TableDescription(table_name='t1', column_names=['x', 'y']) .\
   extend({'x': '(x + 1)'}) .\
   natural_join(b=(
      TableDescription(table_name='t1', column_names=['x', 'y'])),
      by=['x', 'y'], jointype='INNER') .\
   natural_join(b=(
      TableDescription(table_name='t2', column_names=['x', 'z']) .\
         extend({'f': '(x + 1)'})),
      by=['x'], jointype='INNER')


In [17]:

eval(str)

TableDescription(table_name='t1', column_names=['x', 'y']) .\
   extend({'x': '(x + 1)'}) .\
   natural_join(b=(
      TableDescription(table_name='t1', column_names=['x', 'y'])),
      by=['x', 'y'], jointype='INNER') .\
   natural_join(b=(
      TableDescription(table_name='t2', column_names=['x', 'z']) .\
         extend({'f': '(x + 1)'})),
      by=['x'], jointype='INNER')

In [18]:
s2 = black.format_str(str, mode=black.FileMode())
print(s2)

TableDescription(table_name="t1", column_names=["x", "y"]).extend(
    {"x": "(x + 1)"}
).natural_join(
    b=(TableDescription(table_name="t1", column_names=["x", "y"])),
    by=["x", "y"],
    jointype="INNER",
).natural_join(
    b=(
        TableDescription(table_name="t2", column_names=["x", "z"]).extend(
            {"f": "(x + 1)"}
        )
    ),
    by=["x"],
    jointype="INNER",
)



In [19]:
eval(s2)

TableDescription(table_name='t1', column_names=['x', 'y']) .\
   extend({'x': '(x + 1)'}) .\
   natural_join(b=(
      TableDescription(table_name='t1', column_names=['x', 'y'])),
      by=['x', 'y'], jointype='INNER') .\
   natural_join(b=(
      TableDescription(table_name='t2', column_names=['x', 'z']) .\
         extend({'f': '(x + 1)'})),
      by=['x'], jointype='INNER')