Skip to content

Commit

Permalink
Moving away from Integer | Nothing to Rows_To_Read for limiting n…
Browse files Browse the repository at this point in the history
…umber of rows. (#9925)

- Added a new `Rows_To_Read` type with conversions from `Nothing` and integers.
- Updated `read` on `Table`, `Column`, `DB_Table` and `DB_Column`.
- Updated `Delimited_Format.Delimited` to use `Rows_To_Read` for `row_limit`.
- Updated `Excel_Format.Sheet` and `Excel_Format.Range` to use `Rows_To_Read` for `row_limit`.
- Updated `Excel_Workbook.read` to use `Rows_To_Read`.
- Updated `Connection.read` (in all connection types) to use `Rows_To_Read`.

![image](https://github.com/enso-org/enso/assets/4699705/553c027f-f4c3-4855-9f51-2c4bcaec48a0)

![image](https://github.com/enso-org/enso/assets/4699705/a06c3912-77e0-4c10-abb8-73aed667458d)
  • Loading branch information
jdunkerley committed May 14, 2024
1 parent 557d585 commit b2aeb9f
Show file tree
Hide file tree
Showing 20 changed files with 206 additions and 157 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice, Vector_Editor

import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
import Standard.Table.Rows_To_Read.Rows_To_Read
from Standard.Table import Table, Value_Type

import project.Column_Description.Column_Description
Expand Down Expand Up @@ -251,9 +252,6 @@ type Connection
If supplied as `Text`, the name is checked against the `tables` list to
determine if it is a table or a query.
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `limit`.

? Side Effects

Expand All @@ -263,9 +261,10 @@ type Connection
`execute_update` for DML queries, or if they are supposed to return
results, the `read` should be wrapped in an execution context check.
@query make_table_name_selector
read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Table ! Table_Not_Found
read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.query query . read max_rows=limit warn_if_more_rows=warn_if_more_rows
@limit Rows_To_Read.default_widget
read : Text | SQL_Query -> Rows_To_Read -> Table ! Table_Not_Found
read self query (limit : Rows_To_Read = ..First_With_Warning 1000) =
self.query query . read max_rows=limit

## PRIVATE
Creates a new empty table in the database and returns a query referencing
Expand Down
15 changes: 7 additions & 8 deletions distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import Standard.Table.Internal.Java_Problems
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import Standard.Table.Internal.Value_Type_Helpers
import Standard.Table.Internal.Widget_Helpers
import Standard.Table.Rows_To_Read.Rows_To_Read
from Standard.Table import Auto, Column, Data_Formatter, Previous_Value, Sort_Column, Table, Value_Type
from Standard.Table.Column import default_date_period
from Standard.Table.Errors import Conversion_Failure, Floating_Point_Equality, Inexact_Type_Coercion, Invalid_Value_Type
Expand Down Expand Up @@ -105,18 +106,16 @@ type DB_Column

Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
read : (Nothing | Integer) -> Boolean -> Column
read self (max_rows : Nothing | Integer = 1000) (warn_if_more_rows:Boolean = True) =
self.to_table.read max_rows warn_if_more_rows . at 0
@max_rows Rows_To_Read.default_widget
read : Rows_To_Read -> Column
read self (max_rows : Rows_To_Read = ..First_With_Warning 1000) =
self.to_table.read max_rows . at 0

## GROUP Standard.Base.Conversions
ICON convert
Returns a vector containing all the elements in this column.
to_vector : Vector Any
to_vector self = self.read max_rows=Nothing . to_vector
to_vector self = self.read (..All_Rows) . to_vector

## GROUP Standard.Base.Selections
ICON select_row
Expand Down Expand Up @@ -154,7 +153,7 @@ type DB_Column
example_at = Examples.integer_column.get 0 -1
get : Integer -> Any -> Any | Nothing
get self (index : Integer) (~default=Nothing) =
self.read index+1 . get index default
self.read (..First index+1) . get index default

## GROUP Standard.Base.Metadata
Returns the `Value_Type` associated with that column.
Expand Down
81 changes: 38 additions & 43 deletions distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import Standard.Table.Internal.Value_Type_Helpers
import Standard.Table.Internal.Widget_Helpers
import Standard.Table.Match_Columns as Match_Columns_Helpers
import Standard.Table.Row.Row
import Standard.Table.Rows_To_Read.Rows_To_Read
from Standard.Table import Aggregate_Column, Auto, Blank_Selector, Column_Ref, Data_Formatter, Join_Condition, Join_Kind, Match_Columns, Position, Previous_Value, Report_Unmatched, Set_Mode, Simple_Expression, Sort_Column, Table, Value_Type
from Standard.Table.Errors import all
from Standard.Table.Internal.Filter_Condition_Helpers import make_filter_column
Expand Down Expand Up @@ -96,7 +97,7 @@ type DB_Table
- format_terminal: whether ANSI-terminal formatting should be used
display : Integer -> Boolean -> Text
display self show_rows=10 format_terminal=False =
data_fragment_with_warning = self.read max_rows=show_rows warn_if_more_rows=True
data_fragment_with_warning = self.read (..First_With_Warning show_rows)
has_more_rows = data_fragment_with_warning.has_warnings warning_type=Not_All_Rows_Downloaded
data_fragment_cleared = data_fragment_with_warning.remove_warnings Not_All_Rows_Downloaded
# `row_count` means another Database query is performed, so we only do it if we need to.
Expand Down Expand Up @@ -1130,26 +1131,24 @@ type DB_Table

Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
rows : Integer | Nothing -> Boolean -> Vector Row
rows self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.read max_rows=max_rows warn_if_more_rows=warn_if_more_rows . rows
@max_rows Rows_To_Read.default_widget
rows : Rows_To_Read -> Vector Row
rows self (max_rows : Rows_To_Read = (..First_With_Warning 1000)) =
self.read max_rows . rows

## GROUP Standard.Base.Selections
ICON select_row
Returns the first row of the table.
first_row : Row ! Index_Out_Of_Bounds
first_row self =
self.read max_rows=1 warn_if_more_rows=False . rows . first
self.read (..First 1) . rows . first

## GROUP Standard.Base.Selections
ICON select_row
Returns the second row of the table.
second_row : Row ! Index_Out_Of_Bounds
second_row self =
self.read max_rows=2 warn_if_more_rows=False . rows . second
self.read (..First 2) . rows . second

## GROUP Standard.Base.Selections
ICON select_row
Expand Down Expand Up @@ -2547,40 +2546,36 @@ type DB_Table

Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
read : (Integer | Nothing) -> Boolean -> Table
read self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows:Boolean = True) =
preprocessed = if max_rows.is_nothing then self else
if warn_if_more_rows then self.limit max_rows+1 else self.limit max_rows
case preprocessed.internal_columns.is_empty of
True ->
Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.")
False ->
sql = preprocessed.to_sql
column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
materialized_table = self.connection.read_statement sql column_type_suggestions . catch SQL_Error sql_error->
Error.throw (self.connection.dialect.get_error_mapper.transform_custom_errors sql_error)

warnings_builder = Vector.new_builder
expected_types = self.columns.map .value_type
actual_types = materialized_table.columns.map .value_type
expected_types.zip actual_types expected_type-> actual_type->
if expected_type == actual_type then Nothing else
expected_type_kind = Meta.meta expected_type . constructor
actual_type_kind = Meta.meta actual_type . constructor
## We ignore simple approximations that our in-memory backend does - things like adding default
timezone (because we do not have Date_Time without timezone in-memory),
or changing Float32 to Float64 are silently ignored.
However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
if expected_type_kind == actual_type_kind then Nothing else
warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)
result = if max_rows.is_nothing || materialized_table.row_count <= max_rows then materialized_table else
assert warn_if_more_rows "We may get more rows than we asked for _only_ if warn_if_more_rows=True"
warnings_builder.append (Not_All_Rows_Downloaded.Warning max_rows)
materialized_table.take max_rows
Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector result
@max_rows Rows_To_Read.default_widget
read : Rows_To_Read -> Table
read self (max_rows : Rows_To_Read = ..First_With_Warning 1000) =
if self.internal_columns.is_empty then Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.") else
preprocessed = case max_rows of
Rows_To_Read.All_Rows -> self
Rows_To_Read.First n -> self.limit n
Rows_To_Read.First_With_Warning n -> self.limit n+1

sql = preprocessed.to_sql
column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
materialized_table = self.connection.read_statement sql column_type_suggestions . catch SQL_Error sql_error->
Error.throw (self.connection.dialect.get_error_mapper.transform_custom_errors sql_error)

warnings_builder = Vector.new_builder
expected_types = self.columns.map .value_type
actual_types = materialized_table.columns.map .value_type
expected_types.zip actual_types expected_type-> actual_type->
if expected_type == actual_type then Nothing else
expected_type_kind = Meta.meta expected_type . constructor
actual_type_kind = Meta.meta actual_type . constructor
## We ignore simple approximations that our in-memory backend does - things like adding default
timezone (because we do not have Date_Time without timezone in-memory),
or changing Float32 to Float64 are silently ignored.
However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
if expected_type_kind == actual_type_kind then Nothing else
warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)

result = max_rows.attach_warning materialized_table
Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector result

## PRIVATE
Creates a query corresponding to this table.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_
check_for_null_keys lookup_table key_column_names <|
if allow_unmatched_rows then continuation else
unmatched_rows = base_table.join lookup_table on=key_column_names join_kind=Join_Kind.Left_Exclusive . select_columns key_column_names
unmatched_example = unmatched_rows.read max_rows=1 warn_if_more_rows=False
unmatched_example = unmatched_rows.read (..First 1)
if unmatched_example.row_count == 0 then continuation else
first_row = unmatched_example.rows.first
Error.throw (Unmatched_Rows_In_Lookup.Error first_row.to_vector)
Expand Down Expand Up @@ -197,7 +197,7 @@ precheck_for_duplicate_matches lookup_columns subquery_setup connection new_ctx
Lookup_Column.Key_Column _ _ -> [subquery_setup.get_self_column ix]
_ -> []
table_for_duplicate_check = DB_Table.Value subquery_setup.new_table_name connection [subquery_setup.lookup_counter]+key_columns_for_duplicate_check new_ctx
duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read max_rows=1 warn_if_more_rows=False
duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read (..First 1)
case duplicate_lookup_matches.row_count > 0 of
True ->
first_example_row = duplicate_lookup_matches.read.rows.first.to_vector
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice

from Standard.Table import Table, Value_Type
import Standard.Table.Rows_To_Read.Rows_To_Read

import project.Column_Description.Column_Description
import project.Connection.Connection.Connection
Expand Down Expand Up @@ -167,9 +168,6 @@ type Postgres_Connection
If supplied as `Text`, the name is checked against the `tables` list to
determine if it is a table or a query.
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `limit`.

? Side Effects

Expand All @@ -179,9 +177,10 @@ type Postgres_Connection
`execute_update` for DML queries, or if they are supposed to return
results, the `read` should be wrapped in an execution context check.
@query make_table_name_selector
read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Table ! Table_Not_Found
read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.connection.read query limit warn_if_more_rows
@limit Rows_To_Read.default_widget
read : Text | SQL_Query -> Rows_To_Read -> Table ! Table_Not_Found
read self query (limit : Rows_To_Read = ..First_With_Warning 1000) =
self.connection.read query limit

## GROUP Standard.Base.Output
ICON data_output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice

from Standard.Table import Table, Value_Type
import Standard.Table.Rows_To_Read.Rows_To_Read

import project.Column_Description.Column_Description
import project.Connection.Connection.Connection
Expand Down Expand Up @@ -154,9 +155,6 @@ type SQLite_Connection
If supplied as `Text`, the name is checked against the `tables` list to
determine if it is a table or a query.
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `limit`.

? Side Effects

Expand All @@ -166,9 +164,10 @@ type SQLite_Connection
`execute_update` for DML queries, or if they are supposed to return
results, the `read` should be wrapped in an execution context check.
@query make_table_name_selector
read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Table ! Table_Not_Found
read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.connection.read query limit warn_if_more_rows
@limit Rows_To_Read.default_widget
read : Text | SQL_Query -> Rows_To_Read -> Table ! Table_Not_Found
read self query (limit : Rows_To_Read = ..First_With_Warning 1000) =
self.connection.read query limit

## GROUP Standard.Base.Output
ICON data_output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ type Non_Unique_Key_Recipe
raise_duplicated_primary_key_error source_table primary_key original_panic =
agg = source_table.aggregate primary_key [Aggregate_Column.Count]
filtered = agg.filter column=-1 (Filter_Condition.Greater than=1)
materialized = filtered.read max_rows=1 warn_if_more_rows=False
materialized = filtered.read (..First 1)
case materialized.row_count == 0 of
## If we couldn't find a duplicated key, we give up the translation and
rethrow the original panic containing the SQL error. This could
Expand Down Expand Up @@ -619,7 +619,7 @@ check_multiple_rows_match left_table right_table key_columns ~continuation =
joined = left_table.join right_table on=key_columns join_kind=Join_Kind.Inner
counted = joined.aggregate key_columns [Aggregate_Column.Count]
duplicates = counted.filter -1 (Filter_Condition.Greater than=1)
example = duplicates.read max_rows=1 warn_if_more_rows=False
example = duplicates.read (..First 1)
case example.row_count == 0 of
True -> continuation
False ->
Expand All @@ -633,7 +633,7 @@ check_for_null_keys table key_columns ~continuation =
keys = table.select_columns key_columns
is_any_key_blank = keys.columns.map (_.is_nothing) . reduce (||)
null_keys = table.filter is_any_key_blank Filter_Condition.Is_True
example = null_keys.read max_rows=1 warn_if_more_rows=False
example = null_keys.read (..First 1)
case example.row_count == 0 of
True -> continuation
False ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice

from Standard.Table import Table
import Standard.Table.Rows_To_Read.Rows_To_Read

import Standard.Database.Column_Description.Column_Description
import Standard.Database.Connection.Connection.Connection
Expand Down Expand Up @@ -185,9 +186,6 @@ type Snowflake_Connection
If supplied as `Text`, the name is checked against the `tables` list to
determine if it is a table or a query.
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `limit`.

? Side Effects

Expand All @@ -197,9 +195,10 @@ type Snowflake_Connection
`execute_update` for DML queries, or if they are supposed to return
results, the `read` should be wrapped in an execution context check.
@query make_table_name_selector
read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Table ! Table_Not_Found
read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.connection.read query limit warn_if_more_rows
@limit Rows_To_Read.default_widget
read : Text | SQL_Query -> Rows_To_Read -> Table ! Table_Not_Found
read self query (limit : Rows_To_Read = ..First_With_Warning 1000) =
self.connection.read query limit

## GROUP Standard.Base.Output
ICON data_output
Expand Down
11 changes: 5 additions & 6 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import project.Internal.Parse_Values_Helper
import project.Internal.Storage
import project.Internal.Value_Type_Helpers
import project.Internal.Widget_Helpers
import project.Rows_To_Read.Rows_To_Read
import project.Table.Table
import project.Value_Type.Auto
import project.Value_Type.Value_Type
Expand Down Expand Up @@ -2140,13 +2141,11 @@ type Column

Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
read : (Nothing | Integer) -> Boolean -> Column
read self (max_rows : Nothing | Integer = Nothing) (warn_if_more_rows:Boolean = True) =
@max_rows Rows_To_Read.default_widget
read : Rows_To_Read -> Column
read self (max_rows : Rows_To_Read = ..All_Rows) =
if max_rows.is_nothing then self else
self.to_table.read max_rows warn_if_more_rows . at 0
self.to_table.read max_rows . at 0

## GROUP Standard.Base.Conversions
ICON convert
Expand Down

0 comments on commit b2aeb9f

Please sign in to comment.