This is a note how to start a [data algebra](https://github.com/WinVector/data_algebra) pipeline with user supplied SQL.

In [1]:
import pandas

from data_algebra.data_ops import *
import data_algebra.SQLite
import data_algebra.test_util

In [2]:
d1 = pandas.DataFrame({
    'g': ['a', 'a', 'b', 'b'],
    'v1': [1, 2, 3, 4],
    'v2': [5, 6, 7, 8],
})

In [3]:
sqlite_handle = data_algebra.SQLite.example_handle()
sqlite_handle.insert_table(d1, table_name='d1')

(TableDescription(table_name="d1", column_names=["g", "v1", "v2"]))

In [4]:
ops = (
    SQLNode(
        sql="""
          SELECT
            *,
            v1 * v2 AS v3
          FROM
            d1
        """,
        column_names=['g', 'v1', 'v2', 'v3'],
        view_name='derived_results'
        )
        .extend({'v4': 'v3 + v1'})
)

In [5]:
print(sqlite_handle.to_sql(ops))

-- data_algebra SQL https://github.com/WinVector/data_algebra
--  dialect: SQLiteModel
--       string quote: '
--   identifier quote: "
WITH
 "derived_results" AS (
  -- user supplied SQL
             SELECT
               *,
               v1 * v2 AS v3
             FROM
               d1
 )
SELECT  -- .extend({ 'v4': 'v3 + v1'})
 "g" ,
 "v1" ,
 "v2" ,
 "v3" ,
 "v3" + "v1" AS "v4"
FROM
 "derived_results"



In [6]:
expect = d1.copy()
expect['v3'] = expect['v1'] * expect['v2']
expect['v4'] = expect['v3'] + expect['v1']

In [7]:
res_sqllite = sqlite_handle.read_query(ops)

res_sqllite

Unnamed: 0,g,v1,v2,v3,v4
0,a,1,5,5,6
1,a,2,6,12,14
2,b,3,7,21,24
3,b,4,8,32,36


In [8]:
assert data_algebra.test_util.equivalent_frames(res_sqllite, expect)

In [9]:
dr = d1.copy()
dr['v3'] = dr['v1'] * dr['v2']

res_pandas = ops.eval({'derived_results': dr})

res_pandas

Unnamed: 0,g,v1,v2,v3,v4
0,a,1,5,5,6
1,a,2,6,12,14
2,b,3,7,21,24
3,b,4,8,32,36


In [10]:
assert data_algebra.test_util.equivalent_frames(res_pandas, expect)

In [11]:
sqlite_handle.close()
