In [39]:
import ibis
from boring_semantic_layer.semantic_api.api import to_semantic_table

con = ibis.duckdb.connect("test.duckdb")

BASE_URL = "https://pub-a45a6a332b4646f2a6f44775695c64df.r2.dev"
tables = {
    "flights_tbl": con.read_parquet(f"{BASE_URL}/flights.parquet"),
    "carriers_tbl": con.read_parquet(f"{BASE_URL}/carriers.parquet"),
}

In [119]:
# definition
flight_st = to_semantic_table(tables["flights_tbl"]).with_dimensions(
    flight_count=lambda t: t.count(),
    month_of_year=lambda t: t.arr_time.truncate("month"),
    flight_year=lambda t: t.arr_time.year(),
)

In [120]:
# working
query_1 = flight_st.group_by("month_of_year").aggregate(
    flight_count=lambda t: t.count(),
)

query_1.execute()

Unnamed: 0,month_of_year,flight_count
0,2003-07-01,5113
1,2004-02-01,5217
2,2004-04-01,5379
3,2004-07-01,5766
4,2004-09-01,5683
...,...,...
68,2003-04-01,4868
69,2005-08-01,6402
70,2002-11-01,3914
71,2002-03-01,4101


In [None]:
# not working, 1900 is not filtered out
query_1 = (
    flight_st.filter(lambda t: t.flight_year > 1900)
    .group_by("flight_year")
    .aggregate(
        flight_count=lambda t: t.count(),
    )
    .execute()
)
query_1

Unnamed: 0,flight_year,flight_count
0,1900,261
1,2000,47124
2,2001,49164
3,2002,49644
4,2003,58635
5,2004,68303
6,2005,71696


In [122]:
# not working
query_1 = (
    flight_st.group_by("flight_year")
    .aggregate(
        flight_count=lambda t: t.count(),
    ).filter(lambda t: t.flight_year > 1900)
    .execute()
)
query_1

AttributeError: 'Table' object has no attribute 'arr_time'

In [None]:
# not working:
query_1 = (
    flight_st.group_by(
        day=lambda t: t.arr_time.day(),
    )
    .aggregate(
        flight_count=lambda t: t.count(),
    )
    .execute()
)

In [101]:
# not working:
query_1 = (
    flight_st.group_by("month_of_year")
    .aggregate(
        flight_count=lambda t: t.count(),
    )
    .order_by("month_of_year")
    .execute()
)

OperationNotDefinedError: Compilation rule for 'SemanticTable' operation is not defined

Unnamed: 0,month_of_year,flight_count
0,2004-10-01,6218
1,2003-01-01,4891
2,2005-02-01,5699
3,2005-05-01,6059
4,2000-05-01,4046
...,...,...
68,2000-01-01,4018
69,2003-05-01,5109
70,2003-06-01,4999
71,2002-07-01,4346


In [6]:
flights_st = (
    to_semantic_table(tables["flights_tbl"])
    .with_dimensions(
        origin=lambda t: t.origin,
        carrier=lambda t: t.carrier,
        month=lambda t: t.arr_time.truncate("month"),
    )
    .with_measures(
        flight_count=lambda t: t.count(),
        avg_dep_delay=lambda t: t.dep_delay.mean(),
    )
)

In [89]:
flights_st.group_by("month", "carrier").aggregate(lambda t: t.flight_count).execute()

Unnamed: 0,month,carrier,flight_count
0,2004-12-01,US,632
1,2000-08-01,DL,567
2,2004-02-01,RU,408
3,2004-04-01,RU,439
4,2000-01-01,US,554
...,...,...,...
866,2003-01-01,TZ,137
867,2005-03-01,TZ,98
868,2004-08-01,B6,130
869,1900-01-01,UA,18


In [85]:
import malloy
from malloy.data.duckdb import DuckDbConnection

with malloy.Runtime() as runtime:
    runtime.add_connection(DuckDbConnection(home_dir="."))

    data = await runtime.load_file(
        "/Users/julien/Documents/Sumeo/projects/boringdata/boring-semantic-layer/tests/malloy_benchmark/comparing_timeframe.malloy"
    ).run(named_query="query_1")

    df = data.to_dataframe()

    print(df)

I0000 00:00:1756298672.341104 65473345 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers


    month_of_year  flight_count flight_year
0               8          6415  2005-01-01
1              10          6228  2004-01-01
2               7          6227  2005-01-01
3              12          6202  2004-01-01
4               6          6118  2005-01-01
..            ...           ...         ...
67             12          3744  2000-01-01
68             11          3718  2000-01-01
69              2          3672  2001-01-01
70              9          3616  2001-01-01
71              2          3583  2002-01-01

[72 rows x 3 columns]


In [73]:
import malloy
from malloy.data.duckdb import DuckDbConnection

BASE_URL = "https://pub-a45a6a332b4646f2a6f44775695c64df.r2.dev"
tables = {
    "flights_tbl": con.read_parquet(f"{BASE_URL}/flights.parquet"),
    "carriers_tbl": con.read_parquet(f"{BASE_URL}/carriers.parquet"),
    "airports_tbl": con.read_parquet(f"{BASE_URL}/airports.parquet"),
}

with malloy.Runtime() as runtime:
    runtime.add_connection(DuckDbConnection(home_dir="."))

    data = await runtime.load_source(
        """
        source:airports is duckdb.table('%s/airports.parquet') extend{
            primary_key: code

            measure: airport_count is count()

            dimension: name is concat(code, ' - ', full_name)
            dimension: faa_region_name is faa_region ?
                pick 'Southwest' when 'ASW'
                pick 'Northwest Mountain' when 'ANM'
                pick 'Eastern' when 'AEA'
                pick 'Southern' when 'ASO'
                pick 'Great Lakes' when 'AGL'
                pick 'Central' when 'ACE'
                pick 'New England' when 'ANE'
                pick 'Western Pacific' when 'AWP'
                pick 'Alaska' when 'AAL'
            
            view: by_region is {
                group_by: faa_region
                group_by: faa_region_name
                aggregate: airport_count
            }

            view: by_state is {
                where: state is not null
                group_by: state
                aggregate: airport_count
            }

            source: carriers is duckdb.table('../data/carriers.parquet') extend {
                primary_key: code
                measure: carrier_count is count()
            }
            
        }
        """
        % BASE_URL
    ).run(
        query="""
        run: airports -> {
          group_by: code
          aggregate: code_count is count()
          limit: 5
        }
    """
    )
    df = data.to_dataframe()
    print(df)

I0000 00:00:1756297577.284866 65473345 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers


   code  code_count
0   ROI           1
1   N18           1
2   BCV           1
3  11AK           1
4   UBW           1
