In [None]:
import pyarrow.dataset as ds
from graphique import GraphQL


def execute(query):
    result = app.schema.execute_sync(query, root_value=app.root_value, context_value={})
    for error in result.errors or []:
        raise ValueError(error)
    return result.data


format = ds.ParquetFileFormat(read_options={'dictionary_columns': ['state']})
dataset = ds.dataset('../tests/fixtures/zipcodes.parquet', format=format)
app = GraphQL(dataset)

### Introspect the dataset.

In [None]:
execute("""{
  length
  schema {
    names
    types
    partitioning
    index
  }
}""")

### Loading options
* Ibis table with camel-cased fields (not relevant in this dataset)
* Table already read

In [None]:
import ibis
from strawberry.utils.str_converters import to_camel_case

columns = {to_camel_case(name): ibis._[name] for name in dataset.schema.names}
GraphQL(ibis.read_parquet('../tests/fixtures/zipcodes.parquet').select(columns))

In [None]:
GraphQL(dataset.to_table())

### Find California counties with the most cities.
* `filter` state by "CA"
* `group` by county
  * aggregate distinct count of cities
* `sort` by city counts descending
* access `columns`
  * `county` is still known in the schema
  * cities is a new `column` accessed through an inline fragment

In [None]:
execute("""{
  filter(state: {eq: "CA"}) {
    group(by: "county", aggregate: {countDistinct: {name: "city", alias: "cities"}}) {
      sort(by: "-cities", length: 5) {
        columns {
          county {
            values
          }
        }
        cities: column(name: "cities") {
          ... on LongColumn {
            values
          }
        }
      }
    }
  }
}""")

### Find states with cities which match the name of their county.
* `scan` instead of `filter`, because comparing two columns is not a "simple" query
* `Column.unique` instead of `group`, because no other aggregates are needed

In [None]:
execute("""{
  scan(filter: {eq: [{name: "county"}, {name: "city"}]}) {
    columns {
      state {
        unique {
          length
          values
        }
      }
    }
  }
}""")

### States which have no cities which match the name of their county.
The opposite of the previous example. Filtering rows would drop needed data; the "zeros" have to be counted.
* `scan` with projected column matching names instead of filtering
* `group` by state
  * aggregate whether there are `any` matches
* `scan` for no matches
* access column


In [None]:
execute("""{
  scan(columns: {alias: "match", eq: [{name: "county"}, {name: "city"}]}) {
    group(by: "state", aggregate: {any: {name: "match"}}) {
      scan(filter: {inv: {name: "match"}}) {
        columns {
          state {
            values
          }
        }
      }
    }
  }
}""")