Skip to content

Commit

Permalink
Fix most of expand code.
Browse files Browse the repository at this point in the history
  • Loading branch information
jdunkerley committed Jan 31, 2024
1 parent 5bca64c commit a4f3335
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type File_By_Line
create_row_map =
row_map = LongArrayList.new
row_map.add offset
File_By_Line.log "Created row map"
File_By_Line.log_message "Created row map"
row_map
File_By_Line.Reader file encoding Nothing Nothing create_row_map

Expand Down Expand Up @@ -134,7 +134,7 @@ type File_By_Line
position = parent.row_map.getOrLast line
row_map = LongArrayList.new
row_map.add position
parent.log "Created Skipped Row Map ("+line.to_text+")"
parent.log_message "Created Skipped Row Map ("+line.to_text+")"
row_map

new_limit = if self.limit_lines.is_nothing then lines else lines.min self.limit_lines
Expand Down Expand Up @@ -165,7 +165,7 @@ type File_By_Line
first_index = FileLineReader.findFirstNewFilter parent.java_file parent.file_end parent.row_map end_at parent.charset parent.filter_func new_filter
new_row_map = LongArrayList.new
new_row_map.add first_index
parent.log "Found Filter Start - "+first_index.to_text
parent.log_message "Found Filter Start - "+first_index.to_text
new_row_map

## Merge the two predicates together.
Expand All @@ -179,7 +179,7 @@ type File_By_Line
file_len = if parent.row_map.getSize > line then parent.row_map.get line else
for_each_lines self 0 line Nothing
parent.row_map.get parent.row_map.getSize-1
parent.log "Created File End ("+line.to_text+") - "+file_len.to_text
parent.log_message "Created File End ("+line.to_text+") - "+file_len.to_text
file_len
File_By_Line.Reader self.file self.encoding Nothing new_predicate (make_filter_map self new_filter) (index_of self self.limit_lines)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ type Convertible_To_Rows
Arguments:
- length: The number of rows in the table.
- getter: Get the value for a specified row.
- columns: The suffix names for the columns when object is expanded.
- columns: The names for the columns when object is expanded.
These will be added to the name of the input column.
Value length:Integer (getter : Integer->Any) (columns:Vector=["Value"])

## PRIVATE
Expand Down Expand Up @@ -45,7 +46,7 @@ Convertible_To_Rows.from that:Date_Range = Convertible_To_Rows.Value that.length

## PRIVATE
Convertible_To_Rows.from that:Map =
vals = that.map_with_key k->v-> Key_Value.Pair k v
vals = that.to_vector.map p-> Key_Value.Pair p.first p.second
Convertible_To_Rows.Value vals.length vals.get ["Key", "Value"]

## PRIVATE
Expand All @@ -70,13 +71,24 @@ type Key_Value
- value: The value of the pair.
Pair key:Any value:Any

## PRIVATE
at self idx = self.get idx

## PRIVATE
Return the key of the pair.
get self idx:Integer = case idx of
get self idx = case idx of
0 -> self.key
1 -> self.value
"Key" -> self.key
"Value" -> self.value
_ -> Nothing

## PRIVATE
is_empty self = False

## PRIVATE
length self = 2

## PRIVATE
Convertible_To_Columns.from (that:Key_Value) =
Convertible_To_Columns.Value ["Key", "Value"] (k-> if k == "Key" then that.key else that.value)
166 changes: 98 additions & 68 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Fan_Out.enso
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ from Standard.Base import all
import Standard.Base.Runtime.Ref.Ref

import project.Data.Column.Column
import project.Data.Conversions.Convertible_To_Rows.Key_Value
import project.Data.Table.Table
import project.Data.Type.Value_Type.Value_Type
import project.Internal.Problem_Builder.Problem_Builder
Expand Down Expand Up @@ -51,7 +52,10 @@ fan_out_to_rows table input_column_id:Text function column_names=[input_column_i
## Treat this as a special case of fan_out_to_rows_and_columns, with one
column. Wrap the provided function to convert each value to a singleton
`Vector`.
wrapped_function x = function x . map y-> if y.is_a Vector then y else [y]
wrapped_function x = function x . map y-> case y of
_:Vector -> y
_:Key_Value -> y
_ -> [y]
fan_out_to_rows_and_columns table input_column_id wrapped_function column_names at_least_one_row=at_least_one_row column_builder=column_builder on_problems=on_problems

## PRIVATE
Expand Down Expand Up @@ -101,99 +105,125 @@ fan_out_to_rows_and_columns table input_column_id function column_names at_least

input_column = table.at input_column_id
input_storage = input_column.java_column.getStorage

# Create the columns and a mask.
pair = if column_names.is_a Vector then fan_out_to_rows_and_columns_fixed input_storage function at_least_one_row column_names column_builder problem_builder else
fan_out_to_rows_and_columns_dynamic input_storage function at_least_one_row column_names column_builder problem_builder
raw_output_columns = pair.first
order_mask_positions = pair.second

# Reserve the non-input column names that will not be changing.
non_input_columns = table.columns.filter c-> c.name != input_column.name
unique.mark_used <| non_input_columns.map .name

# Make output columns unique.
output_columns = raw_output_columns.map column->
column_name = unique.make_unique column.name
column.rename column_name

# Build the order mask.
order_mask = OrderMask.fromArray (order_mask_positions.to_vector)

## Build the new table, replacing the input column with the new output columns.
new_columns_unflattened = table.columns.map column->
# Replace the input column with the output columns.
if column.name == input_column_id then output_columns else
# Build a new column from the old one with the mask
old_storage = column.java_column.getStorage
new_storage = old_storage.applyMask order_mask
[Column.from_storage column.name new_storage]
new_table = Table.new new_columns_unflattened.flatten
problem_builder.attach_problems_after on_problems new_table

## PRIVATE
Inner method for fan_out_to_rows_and_columns where the column names are fixed.
fan_out_to_rows_and_columns_fixed : Any -> (Any -> Vector (Vector Any)) -> Boolean -> Vector Text -> (Integer -> Any) -> Problem_Builder -> Vector
fan_out_to_rows_and_columns_fixed input_storage function at_least_one_row:Boolean column_names:Vector column_builder problem_builder =
num_output_columns = column_names.length
num_input_rows = input_storage.size

# Create a column name map.
dynamic_columns = column_names.is_a Vector . not
num_output_columns = if dynamic_columns then 0 else column_names.length
column_map = Ref.new <|
if dynamic_columns then Map.empty else Map.from_vector (column_names.map_with_index i->n->[n,i])
# Accumulates the outputs of the function.
output_column_builders = Vector.new num_output_columns _-> column_builder num_input_rows

# Guess that most of the time, we'll get at least one value for each input.
initial_size = input_column.length
# Accumulates repeated position indices for the order mask.
order_mask_positions = Vector.new_builder num_input_rows

empty_row = [Vector.fill num_output_columns Nothing]
maybe_add_empty_row vecs = if vecs.is_empty && at_least_one_row then empty_row else vecs

0.up_to num_input_rows . each i->
input_value = input_storage.getItemBoxed i
output_values = maybe_add_empty_row (function input_value)

output_values.each row_unchecked->
row = uniform_length num_output_columns row_unchecked problem_builder
row.each_with_index i-> v-> output_column_builders.at i . append v

# Append n copies of the input row position, n = # of output values.
repeat_each output_values.length <| order_mask_positions.append i

output_columns = column_names.map_with_index i->n->
Column.from_storage n (output_column_builders.at i . seal)

[output_columns, order_mask_positions]

## PRIVATE
Inner method for fan_out_to_rows_and_columns where the column names are determined by each row.
fan_out_to_rows_and_columns_dynamic : Any -> (Any -> Vector (Vector Any)) -> Boolean -> (Any -> Text) -> (Integer -> Any) -> Problem_Builder -> Vector
fan_out_to_rows_and_columns_dynamic input_storage function at_least_one_row column_names_for_row column_builder problem_builder =
# Accumulates the outputs of the function.
output_column_builders = if dynamic_columns then Vector.new_builder else
Vector.new num_output_columns _-> column_builder initial_size
column_map = Ref.new Map.empty
output_column_builders = Vector.new_builder

# Guess that most of the time, we'll get at least one value for each input.
num_input_rows = input_storage.size

# Column Builder add function
add_column n current_length =
column_map.put (column_map.get.insert n output_column_builders.length)
builder = column_builder initial_size
builder = column_builder num_input_rows
builder.appendNulls current_length
output_column_builders.append builder

# Accumulates repeated position indices for the order mask.
order_mask_positions = Vector.new_builder initial_size
order_mask_positions = Vector.new_builder num_input_rows

maybe_add_empty_row vecs =
should_add_empty_row = vecs.is_empty && at_least_one_row
if should_add_empty_row.not then vecs else
empty_row = Vector.fill num_output_columns Nothing
[empty_row]
maybe_add_empty_row vecs = if (vecs.is_empty && at_least_one_row).not then vecs else
[Vector.fill output_column_builders.length Nothing]

0.up_to num_input_rows . each i->
input_value = input_storage.getItemBoxed i
output_values = maybe_add_empty_row (function input_value)

case dynamic_columns of
False ->
# Append each group of values to the builder.
output_values.each row_unchecked->
row = uniform_length num_output_columns row_unchecked problem_builder
row.each_with_index i-> v-> output_column_builders.at i . append v
True ->
# get the column names for the row.
row_column_names = column_names input_value

# Add any missing columns.
row_column_names.each n->
if column_map.get.contains_key n . not then add_column n order_mask_positions.length

# Append each group of values to the builder.
current_columns = column_map.get
output_values.each row_unchecked->
row = uniform_length row_column_names.length row_unchecked problem_builder
row_column_names.each_with_index i->n->
output_column_builders.at (current_columns.at n) . append (row.at i)

# Fill in values for any column not present
if row_column_names.length != output_column_builders.length then
current_columns.each_with_key k->i->
if row_column_names.contains k . not then
output_column_builders.at i . appendNulls output_values.length
# get the column names for the row.
row_column_names = column_names_for_row input_value

# Add any missing columns.
row_column_names.each n->
if column_map.get.contains_key n . not then
add_column n order_mask_positions.length

# Append each group of values to the builder.
current_columns = column_map.get
output_values.each row_unchecked->
row = uniform_length row_column_names.length row_unchecked problem_builder
row_column_names.each_with_index i->n->
output_column_builders.at (current_columns.at n) . append (row.at i)

# Fill in values for any column not present
if row_column_names.length != output_column_builders.length then
current_columns.each_with_key k->i->
if row_column_names.contains k . not then
output_column_builders.at i . appendNulls output_values.length

# Append n copies of the input row position, n = # of output values.
repeat_each output_values.length <| order_mask_positions.append i

# Reserve the non-input column names that will not be changing.
non_input_columns = table.columns.filter c-> c.name != input_column.name
unique.mark_used <| non_input_columns.map .name

# Build the output column
output_columns = column_map.get.to_vector.sort on=_.second . map pair->
column_name = unique.make_unique pair.first
Column.from_storage column_name (output_column_builders.at pair.second . seal)
Column.from_storage pair.first (output_column_builders.at pair.second . seal)

# Build the order mask.
order_mask = OrderMask.fromArray (order_mask_positions.to_vector)

## Build the new table, replacing the input column with the new output
columns.
new_columns_unflattened = table.columns.map column->
case column.name == input_column_id of
True ->
# Replace the input column with the output columns.
output_columns
False ->
# Build a new column from the old one with the mask
old_storage = column.java_column.getStorage
new_storage = old_storage.applyMask order_mask
[Column.from_storage column.name new_storage]
new_columns = new_columns_unflattened.flatten

new_table = Table.new new_columns
problem_builder.attach_problems_after on_problems new_table
[output_columns, order_mask_positions]

## PRIVATE

Expand Down

0 comments on commit a4f3335

Please sign in to comment.