In [1]:
import pandas as pd
from data_algebra.data_ops import descr
import data_algebra.test_util
import data_algebra.BigQuery

In [2]:
d = pd.DataFrame({
    'id': [0, 1, 2, 3, 4],
    'x': [4, 50, 1, 3, 2.2],
})

d

Unnamed: 0,id,x
0,0,4.0
1,1,50.0
2,2,1.0
3,3,3.0
4,4,2.2


In [3]:
ops = (
    descr(d=d)
        .extend(
            {'o': '(1).cumcount()'},
            order_by=['x'])
        .order_rows(['id'])
)

In [4]:
pandas_res = ops.transform(d)

pandas_res

Unnamed: 0,id,x,o
0,0,4.0,3
1,1,50.0,4
2,2,1.0,0
3,3,3.0,2
4,4,2.2,1


In [5]:
expect = pd.DataFrame({
    'id': [0, 1, 2, 3, 4],
    'x': [4, 50, 1, 3, 2.2],
    'o': [3, 4, 0, 2, 1],
})

assert data_algebra.test_util.equivalent_frames(
    pandas_res,
    expect)

In [6]:
bigquery_handle = data_algebra.BigQuery.example_handle()
bigquery_handle.insert_table(d, table_name='d', allow_overwrite=True)

(TableDescription(table_name="d", column_names=["id", "x"]))

In [7]:
bigquery_sql = bigquery_handle.to_sql(ops)

print(bigquery_sql)

-- data_algebra SQL https://github.com/WinVector/data_algebra
--  dialect: BigQueryModel
--       string quote: "
--   identifier quote: `
WITH
 `extend_0` AS (
  SELECT  -- .extend({ 'o': '(1).cumcount()'}, partition_by=1, order_by=['x'])
   `id` ,
   `x` ,
   CUMCOUNT(1) OVER ( ORDER BY `x`  )  AS `o`
  FROM
   `data-algebra-test.test_1.d`
 )
SELECT  -- .order_rows(['id'])
 *
FROM
 `extend_0`
ORDER BY
 `id`





In [8]:
try:
    bigquery_handle.read_query(bigquery_sql)
except Exception as ex:
    print(f'caught: {ex}')

caught: 400 Function not found: CUMCOUNT at [10:4]

(job ID: feb74d09-b744-4c83-8435-b6faa990b713)

                          -----Query Job SQL Follows-----                          

    |    .    |    .    |    .    |    .    |    .    |    .    |    .    |
   1:-- data_algebra SQL https://github.com/WinVector/data_algebra
   2:--  dialect: BigQueryModel
   3:--       string quote: "
   4:--   identifier quote: `
   5:WITH
   6: `extend_0` AS (
   7:  SELECT  -- .extend({ 'o': '(1).cumcount()'}, partition_by=1, order_by=['x'])
   8:   `id` ,
   9:   `x` ,
  10:   CUMCOUNT(1) OVER ( ORDER BY `x`  )  AS `o`
  11:  FROM
  12:   `data-algebra-test.test_1.d`
  13: )
  14:SELECT  -- .order_rows(['id'])
  15: *
  16:FROM
  17: `extend_0`
  18:ORDER BY
  19: `id`
    |    .    |    .    |    .    |    .    |    .    |    .    |    .    |


In [9]:
ops_2 = (
    descr(d=d)
        .extend(
            {'o': '(1).cumsum()'},
            order_by=['x'])
        .order_rows(['id'])
)


In [10]:
pandas_res_2 = ops_2.transform(d)

pandas_res_2

Unnamed: 0,id,x,o
0,0,4.0,4
1,1,50.0,5
2,2,1.0,1
3,3,3.0,3
4,4,2.2,2


In [11]:
expect_2 = pd.DataFrame({
    'id': [0, 1, 2, 3, 4],
    'x': [4, 50, 1, 3, 2.2],
    'o': [4, 5, 1, 3, 2],
})

assert data_algebra.test_util.equivalent_frames(
    pandas_res_2,
    expect_2)

In [12]:
bigquery_sql_2 = bigquery_handle.to_sql(ops_2)

print(bigquery_sql_2)

-- data_algebra SQL https://github.com/WinVector/data_algebra
--  dialect: BigQueryModel
--       string quote: "
--   identifier quote: `
WITH
 `extend_0` AS (
  SELECT  -- .extend({ 'o': '(1).cumsum()'}, partition_by=1, order_by=['x'])
   `id` ,
   `x` ,
   SUM(1) OVER ( ORDER BY `x`  )  AS `o`
  FROM
   `data-algebra-test.test_1.d`
 )
SELECT  -- .order_rows(['id'])
 *
FROM
 `extend_0`
ORDER BY
 `id`



In [13]:
bigquery_res_2 = bigquery_handle.read_query(bigquery_sql_2)

bigquery_res_2

Unnamed: 0,id,x,o
0,0,4.0,4
1,1,50.0,5
2,2,1.0,1
3,3,3.0,3
4,4,2.2,2


In [14]:
assert data_algebra.test_util.equivalent_frames(
    pandas_res_2,
    bigquery_res_2)

In [15]:
bigquery_handle.drop_table('d')
bigquery_handle.close()