In [1]:
from pprint import pprint

import sqlparse

from data_algebra.data_ops import *
import data_algebra.env
import data_algebra.yaml
import data_algebra.PostgreSQL

def pretty_sql(op, db_model):
    sql = op.to_sql(db_model=db_model)
    return sqlparse.format(sql, reindent=True, keyword_case="upper")

db_model = data_algebra.PostgreSQL.PostgreSQLModel()

_, _1, _2, _get = [None, None, None, lambda x: x] # don't look unbound
data_algebra.env.push_onto_namespace_stack(locals())

t1 = TableDescription('t1', ['x', 'y'])

In [2]:
print(t1)

Table(t1; x, y)


In [3]:
print(pretty_sql(t1, db_model))


SELECT "y",
       "x"
FROM "t1"


In [4]:
ops = t1 . extend({'v':'x + 1'})

In [5]:
print(ops)

Table(t1; x, y) .
   extend({'v': (x + 1)})


In [6]:
print(pretty_sql(ops, db_model))

SELECT "y",
       "x",
       ("x" + 1) AS "v"
FROM
  (SELECT "y",
          "x"
   FROM "t1") "SQ_0"


In [7]:
opse = t1 . extend({'y':'y.max()'}, partition_by = ['x'])
print(opse)
print(pretty_sql(opse, db_model))

Table(t1; x, y) .
   extend({'y': y.max()}, partition_by:['x'])
SELECT "x",
       MAX("y") OVER (PARTITION BY "x") AS "y"
FROM
  (SELECT "y",
          "x"
   FROM "t1") "SQ_0"


In [8]:
t1b = TableDescription('t1', ['x', 'y', 'one_more'])
t2 = TableDescription('t2', ['x', 'z'])

ops = ( 
        t1 . 
            extend({'x':'x + 1'}) . 
            natural_join(
                b=t1b,
                by=['x', 'y']) . 
            natural_join(
                b=(t2 . extend({'f':'x + 1'})),
                by='x') 
)

try:
    print(ops.get_tables())
except Exception as ex:
    print(ex)

Two tables with key t1 have different column sets.


In [9]:
t1b = TableDescription('t1', ['x', 'y'])
t2 = TableDescription('t2', ['x', 'z'])

ops = ( 
        t1 . 
            extend({'x':'x + 1'}) . 
            natural_join(
                b=t1b,
                by=['x', 'y']) . 
            natural_join(
                b=(t2 . extend({'f':'x + 1'})),
                by='x') 
)
print(ops)

Table(t1; x, y) .
   extend({'x': (x + 1)}) .
   natural_join(b=(
      Table(t1; x, y)),
      by=['x', 'y'], jointype=INNER) .
   natural_join(b=(
      Table(t2; x, z) .
         extend({'f': (x + 1)})),
      by=['x'], jointype=INNER)


In [10]:
print(ops.get_tables())

{'t1': Table(t1; x, y), 't2': Table(t2; x, z)}


In [11]:
print(pretty_sql(ops, db_model))

SELECT COALESCE("LQ_8"."y", "RQ_9"."y") AS "y",
       COALESCE("LQ_8"."x", "RQ_9"."x") AS "x"
FROM
  (SELECT COALESCE("LQ_2"."y", "RQ_3"."y") AS "y",
          COALESCE("LQ_2"."x", "RQ_3"."x") AS "x"
   FROM
     (SELECT "y",
             ("x" + 1) AS "x"
      FROM
        (SELECT "y",
                "x"
         FROM "t1") "SQ_0") "LQ_2"
   INNER JOIN
     (SELECT "y",
             ("x" + 1) AS "x"
      FROM
        (SELECT "y",
                "x"
         FROM "t1") "SQ_1") "RQ_3") "LQ_8"
INNER JOIN
  (SELECT COALESCE("LQ_6"."y", "RQ_7"."y") AS "y",
          COALESCE("LQ_6"."x", "RQ_7"."x") AS "x"
   FROM
     (SELECT "y",
             ("x" + 1) AS "x"
      FROM
        (SELECT "y",
                "x"
         FROM "t1") "SQ_4") "LQ_6"
   INNER JOIN
     (SELECT "y",
             ("x" + 1) AS "x"
      FROM
        (SELECT "y",
                "x"
         FROM "t1") "SQ_5") "RQ_7") "RQ_9"


In [12]:
p = ops.collect_representation()
pprint(p)

[OrderedDict([('op', 'TableDescription'),
              ('table_name', 't1'),
              ('qualifiers', {}),
              ('column_names', ['x', 'y']),
              ('key', 't1')]),
 OrderedDict([('op', 'Extend'),
              ('ops', {'x': '(x + 1)'}),
              ('partition_by', []),
              ('order_by', []),
              ('reverse', [])]),
 OrderedDict([('op', 'NaturalJoin'),
              ('by', ['x', 'y']),
              ('jointype', 'INNER'),
              ('b',
               [OrderedDict([('op', 'TableDescription'),
                             ('table_name', 't1'),
                             ('qualifiers', {}),
                             ('column_names', ['x', 'y']),
                             ('key', 't1')])])]),
 OrderedDict([('op', 'NaturalJoin'),
              ('by', ['x']),
              ('jointype', 'INNER'),
              ('b',
               [OrderedDict([('op', 'TableDescription'),
                             ('table_name', 't2'),
              

In [13]:
import yaml
dmp = yaml.dump(p)
print(dmp)

- op: TableDescription
  table_name: t1
  qualifiers: {}
  column_names:
  - x
  - y
  key: t1
- op: Extend
  ops:
    x: (x + 1)
  partition_by: []
  order_by: []
  reverse: []
- op: NaturalJoin
  by:
  - x
  - y
  jointype: INNER
  b:
  - op: TableDescription
    table_name: t1
    qualifiers: {}
    column_names:
    - x
    - y
    key: t1
- op: NaturalJoin
  by:
  - x
  jointype: INNER
  b:
  - op: TableDescription
    table_name: t2
    qualifiers: {}
    column_names:
    - x
    - z
    key: t2
  - op: Extend
    ops:
      f: (x + 1)
    partition_by: []
    order_by: []
    reverse: []



In [14]:
ops_back = data_algebra.yaml.to_pipeline(yaml.safe_load(dmp))
print(ops_back)


Table(t1; x, y) .
   extend({'x': (x + 1)}) .
   natural_join(b=(
      Table(t1; x, y)),
      by=['x', 'y'], jointype=INNER) .
   natural_join(b=(
      Table(t2; x, z) .
         extend({'f': (x + 1)})),
      by=['x'], jointype=INNER)
