Skip to content

Commit

Permalink
Improved struct method - closes #73
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Jun 19, 2024
1 parent 88790dd commit 77dcc6b
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 25 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.11.1 (unreleased)

- Improved `struct` method

## 0.11.0 (2024-06-02)

- Updated Polars to 0.40.0
Expand Down
75 changes: 50 additions & 25 deletions lib/polars/functions/as_datatype.rb
Original file line number Diff line number Diff line change
Expand Up @@ -93,22 +93,32 @@ def concat_list(exprs)

# Collect several columns into a Series of dtype Struct.
#
# @param exprs [Object]
# Columns/Expressions to collect into a Struct
# @param exprs [Array]
# Column(s) to collect into a struct column, specified as positional arguments.
# Accepts expression input. Strings are parsed as column names,
# other non-expression inputs are parsed as literals.
# @param schema [Hash]
# Optional schema that explicitly defines the struct field dtypes. If no columns
# or expressions are provided, schema keys are used to define columns.
# @param eager [Boolean]
# Evaluate immediately
# Evaluate immediately and return a `Series`. If set to `false` (default),
# return an expression instead.
# @param named_exprs [Hash]
# Additional columns to collect into the struct column, specified as keyword
# arguments. The columns will be renamed to the keyword used.
#
# @return [Object]
#
# @example
# Polars::DataFrame.new(
# df = Polars::DataFrame.new(
# {
# "int" => [1, 2],
# "str" => ["a", "b"],
# "bool" => [true, nil],
# "list" => [[1, 2], [3]],
# }
# ).select([Polars.struct(Polars.all).alias("my_struct")])
# )
# df.select([Polars.struct(Polars.all).alias("my_struct")])
# # =>
# # shape: (2, 1)
# # ┌─────────────────────┐
Expand All @@ -120,29 +130,44 @@ def concat_list(exprs)
# # │ {2,"b",null,[3]} │
# # └─────────────────────┘
#
# @example Only collect specific columns as a struct:
# df = Polars::DataFrame.new(
# {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
# )
# df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
# @example Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.
# df.select(Polars.struct("int", false).alias("my_struct"))
# # =>
# # shape: (4, 4)
# # ┌─────┬───────┬─────┬─────────────┐
# # │ a ┆ b ┆ c ┆ a_and_b │
# # │ --- ┆ --- ┆ --- ┆ --- │
# # │ i64 ┆ str ┆ i64 ┆ struct[2] │
# # ╞═════╪═══════╪═════╪═════════════╡
# # │ 1 ┆ one ┆ 9 ┆ {1,"one"} │
# # │ 2 ┆ two ┆ 8 ┆ {2,"two"} │
# # │ 3 ┆ three ┆ 7 ┆ {3,"three"} │
# # │ 4 ┆ four ┆ 6 ┆ {4,"four"} │
# # └─────┴───────┴─────┴─────────────┘
def struct(exprs, eager: false)
# # shape: (2, 1)
# # ┌───────────┐
# # │ my_struct │
# # │ --- │
# # │ struct[2] │
# # ╞═══════════╡
# # │ {1,false} │
# # │ {2,false} │
# # └───────────┘
#
# @example Use keyword arguments to easily name each struct field.
# df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
# # =>
# # {"my_struct"=>Polars::Struct([Polars::Field("p", Polars::Int64)
# # Polars::Field("q", Polars::Boolean)])}
def struct(*exprs, schema: nil, eager: false, **named_exprs)
rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs)
expr = Utils.wrap_expr(Plr.as_struct(rbexprs))

if !schema.nil? && !schema.empty?
if !exprs.any?
# no columns or expressions provided; create one from schema keys
expr =
Utils.wrap_expr(
Plr.as_struct(Utils.parse_as_list_of_expressions(schema.keys))
)
expr = expr.cast(Struct.new(schema), strict: false)
end
end

if eager
Polars.select(struct(exprs, eager: false)).to_series
Polars.select(expr).to_series
else
expr
end
exprs = Utils.selection_to_rbexpr_list(exprs)
Utils.wrap_expr(Plr.as_struct(exprs))
end

# Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
Expand Down

0 comments on commit 77dcc6b

Please sign in to comment.