In [1]:
from pprint import pprint
import yaml

from data_algebra.data_ops import *
import data_algebra.env
import data_algebra.yaml
import data_algebra.PostgreSQL


db_model = data_algebra.PostgreSQL.PostgreSQLModel()

data_algebra.yaml.fix_ordered_dict_yaml_rep()
data_algebra.env.push_onto_namespace_stack(locals())

t1 = TableDescription('t1', ['x', 'y'])

In [2]:
print(t1)

TableDescription(
 table_name='t1',
 column_names=[
   'x', 'y'])


In [3]:
print(t1.to_sql(db_model, pretty=True))


SELECT "x",
       "y"
FROM "t1"


In [4]:
ops = t1 . extend({'v':'x + 1'})

In [5]:
print(ops)

TableDescription(
 table_name='t1',
 column_names=[
   'x', 'y']) .\
   extend({
    'v': 'x + 1'})


In [6]:
print(ops.to_sql(db_model, pretty=True))

SELECT "x" + 1 AS "v",
       "x",
       "y"
FROM "t1"


In [7]:
opse = t1 . extend({'y':'y.max()'}, partition_by = ['x'])
print(opse)
print(opse.to_sql(db_model, pretty=True))

TableDescription(
 table_name='t1',
 column_names=[
   'x', 'y']) .\
   extend({
    'y': 'y.max()'},
   partition_by=['x'])
SELECT MAX("y") OVER (PARTITION BY "x") AS "y",
                     "x"
FROM "t1"


In [8]:
t1b = TableDescription('t1', ['x', 'y', 'one_more'])
t2 = TableDescription('t2', ['x', 'z'])

ops = ( 
        t1b. 
            extend({'x':'x + 1'}) . 
            natural_join(
                b=t1b,
                by=['x', 'y']) . 
            natural_join(
                b=(t2 . extend({'f':'x + 1'})),
                by='x') 
)

try:
    print(ops.get_tables())
except Exception as ex:
    print(ex)

{'t1': TableDescription(
 table_name='t1',
 column_names=[
   'x', 'y', 'one_more']), 't2': TableDescription(
 table_name='t2',
 column_names=[
   'x', 'z'])}


In [9]:
t1b = TableDescription('t1', ['x', 'y'])
t2 = TableDescription('t2', ['x', 'z'])

ops = ( 
        t1 . 
            extend({'x':'x + 1'}) . 
            natural_join(
                b=t1b,
                by=['x', 'y']) . 
            natural_join(
                b=(t2 . extend({'f':'x + 1'})),
                by='x') 
)
print(ops)

TableDescription(
 table_name='t1',
 column_names=[
   'x', 'y']) .\
   extend({
    'x': 'x + 1'}) .\
   natural_join(b=
      TableDescription(
       table_name='t1',
       column_names=[
         'x', 'y']),
      by=['x', 'y'], jointype='INNER') .\
   natural_join(b=
      TableDescription(
       table_name='t2',
       column_names=[
         'x', 'z']) .\
         extend({
          'f': 'x + 1'}),
      by=['x'], jointype='INNER')


In [10]:
print(ops.get_tables())

{'t1': TableDescription(
 table_name='t1',
 column_names=[
   'x', 'y']), 't2': TableDescription(
 table_name='t2',
 column_names=[
   'x', 'z'])}


In [11]:
print(ops.columns_used())

{'t1': {'x', 'y'}, 't2': {'z', 'x'}}


In [12]:
print(ops.to_sql(db_model, pretty=True))

SELECT COALESCE("natural_join_3"."x", "extend_5"."x") AS "x",
       "y",
       "z",
       "f"
FROM
  (SELECT COALESCE("extend_1"."x", "table_reference_2"."x") AS "x",
          COALESCE("extend_1"."y", "table_reference_2"."y") AS "y"
   FROM
     (SELECT "x" + 1 AS "x",
             "y"
      FROM "t1") "extend_1"
   INNER JOIN "t1" "table_reference_2" ON "extend_1"."x" = "table_reference_2"."x",
                                          "extend_1"."y" = "table_reference_2"."y") "natural_join_3"
INNER JOIN
  (SELECT "z",
          "x",
          "x" + 1 AS "f"
   FROM "t2") "extend_5" ON "natural_join_3"."x" = "extend_5"."x"


In [13]:
p = ops.collect_representation()
pprint(p)

[OrderedDict([('op', 'TableDescription'),
              ('table_name', 't1'),
              ('qualifiers', {}),
              ('column_names', ['x', 'y']),
              ('key', 't1')]),
 OrderedDict([('op', 'Extend'),
              ('ops', {'x': 'x + 1'}),
              ('partition_by', []),
              ('order_by', []),
              ('reverse', [])]),
 OrderedDict([('op', 'NaturalJoin'),
              ('by', ['x', 'y']),
              ('jointype', 'INNER'),
              ('b',
               [OrderedDict([('op', 'TableDescription'),
                             ('table_name', 't1'),
                             ('qualifiers', {}),
                             ('column_names', ['x', 'y']),
                             ('key', 't1')])])]),
 OrderedDict([('op', 'NaturalJoin'),
              ('by', ['x']),
              ('jointype', 'INNER'),
              ('b',
               [OrderedDict([('op', 'TableDescription'),
                             ('table_name', 't2'),
                

In [14]:

dmp = yaml.dump(p)
print(dmp)

- op: TableDescription
  table_name: t1
  qualifiers: {}
  column_names:
  - x
  - y
  key: t1
- op: Extend
  ops:
    x: x + 1
  partition_by: []
  order_by: []
  reverse: []
- op: NaturalJoin
  by:
  - x
  - y
  jointype: INNER
  b:
  - op: TableDescription
    table_name: t1
    qualifiers: {}
    column_names:
    - x
    - y
    key: t1
- op: NaturalJoin
  by:
  - x
  jointype: INNER
  b:
  - op: TableDescription
    table_name: t2
    qualifiers: {}
    column_names:
    - x
    - z
    key: t2
  - op: Extend
    ops:
      f: x + 1
    partition_by: []
    order_by: []
    reverse: []



In [15]:
ops_back = data_algebra.yaml.to_pipeline(yaml.safe_load(dmp))
print(ops_back)



TableDescription(
 table_name='t1',
 column_names=[
   'x', 'y']) .\
   extend({
    'x': 'x + 1'}) .\
   natural_join(b=
      TableDescription(
       table_name='t1',
       column_names=[
         'x', 'y']),
      by=['x', 'y'], jointype='INNER') .\
   natural_join(b=
      TableDescription(
       table_name='t2',
       column_names=[
         'x', 'z']) .\
         extend({
          'f': 'x + 1'}),
      by=['x'], jointype='INNER')


In [16]:
print(ops_back.to_python(pretty=True))


TableDescription(table_name="t1", column_names=["x", "y"]).extend(
    {"x": "x + 1"}
).natural_join(
    b=TableDescription(table_name="t1", column_names=["x", "y"]),
    by=["x", "y"],
    jointype="INNER",
).natural_join(
    b=TableDescription(table_name="t2", column_names=["x", "z"]).extend({"f": "x + 1"}),
    by=["x"],
    jointype="INNER",
)

