enso-org · mergify · Jun 6, 2023 · May 23, 2023 · May 23, 2023 · May 23, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -476,6 +476,7 @@
 - [Added `.round`, `.truncate`, `.ceil`, and `.floor` to `Column`.][6817]
 - [Added execution control to `Table.write` and various bug fixes.][6835]
 - [Implemented `Table.add_row_number`.][6890]
+- [Speed improvements to `Column` `.truncate`, `.ceil`, and `.floor`.][6941]
 
 [debug-shortcuts]:
   https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@@ -691,6 +692,7 @@
 [6817]: https://github.com/enso-org/enso/pull/6817
 [6835]: https://github.com/enso-org/enso/pull/6835
 [6890]: https://github.com/enso-org/enso/pull/6890
+[6941]: https://github.com/enso-org/enso/pull/6941
 
 #### Enso Compiler
 

@@ -731,13 +731,12 @@ type Column
     truncate self =
         case self.value_type.is_numeric of
             True ->
-                fun = _.truncate
-                Column_Ops.map_over_storage self fun make_long_builder skip_nothing=True
+                simple_unary_op self "truncate"
             False -> case self.value_type == Value_Type.Date_Time of
                 True ->
                     fun = _.date
                     Column_Ops.map_over_storage self fun make_date_builder_adapter skip_nothing=True
-                False -> Error.throw <| Invalid_Value_Type.Column "Numeric or Date_Time" self.value_type
+                False -> Error.throw <| Invalid_Value_Type.Column "Numeric or Date_Time" self.value_type self.name
 
     ## Computes the nearest integer above this number for values in a numeric
        column.
@@ -750,8 +749,7 @@ type Column
              Column.from_vector "foo" [1.25, 2.33, 3.57] . ceil == (Column.from_vector "foo" [2, 3, 4])
     ceil : Column ! Invalid_Value_Type
     ceil self = Value_Type.expect_numeric self <|
-        fun = _.ceil
-        Column_Ops.map_over_storage self fun make_long_builder skip_nothing=True
+        simple_unary_op self "ceil"
 
     ## Computes the nearest integer below this number for values in a numeric
        column.
@@ -764,8 +762,7 @@ type Column
              Column.from_vector "foo" [1.25, 2.33, 3.57] . floor == (Column.from_vector "foo" [1, 2, 3])
     floor : Column ! Invalid_Value_Type
     floor self = Value_Type.expect_numeric self <|
-        fun = _.floor
-        Column_Ops.map_over_storage self fun make_long_builder skip_nothing=True
+        simple_unary_op self "floor"
 
     ## Returns a column of first non-`Nothing` value on each row of `self` and
        `values` list.

@@ -32,7 +32,7 @@ type Bench
             act_it_num = num_iters - it_num
             res = times it_size single_call
             avg = avg_list res
-            fmt = (avg / 1000000).format "%.2f"
+            fmt = (avg / 1000000).format "#.##"
             result.put (result.get + avg)
             case dry_run of
                 False ->
@@ -41,7 +41,7 @@ type Bench
                     IO.println (label + "/dry-run: " + fmt)
         if dry_run then times 1 (iteration 1) else
             times num_iters (iteration iter_size)
-            fmt_avg = (result.get / (1000000*num_iters)).format "%.2f"
+            fmt_avg = (result.get / (1000000*num_iters)).format "#.##"
             IO.println (label + " average: " + fmt_avg + "ms")
 
 ## PRIVATE

@@ -0,0 +1,31 @@
+package org.enso.table.data.column.operation.map;
+
+import org.enso.table.data.column.storage.numeric.DoubleStorage;
+import org.enso.table.data.column.storage.numeric.LongStorage;
+
+import java.util.BitSet;
+
+/** An operation that takes a single double argumebnt and returns a long. */
+public abstract class UnaryDoubleToLongOp extends UnaryMapOperation<Double, DoubleStorage> {
+
+  public UnaryDoubleToLongOp(String name) {
+    super(name);
+  }
+
+  protected abstract long doOperation(double value);
+
+  @Override
+  protected LongStorage run(DoubleStorage storage) {
+    BitSet newMissing = new BitSet();
+    long[] newVals = new long[storage.size()];
+    for (int i = 0; i < storage.size(); i++) {
+      if (!storage.isNa(i)) {
+        newVals[i] = doOperation(storage.getItem(i));
+      } else {
+        newMissing.set(i);
+      }
+    }
+
+    return new LongStorage(newVals, newVals.length, newMissing);
+  }
+}
@@ -0,0 +1,31 @@
+package org.enso.table.data.column.operation.map;
+
+import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
+import org.enso.table.data.column.storage.numeric.LongStorage;
+
+import java.util.BitSet;
+
+/** An operation that takes a single double argumebnt and returns a long. */
+public abstract class UnaryLongToLongOp extends UnaryMapOperation<Long, AbstractLongStorage> {
+
+  public UnaryLongToLongOp(String name) {
+    super(name);
+  }
+
+  protected abstract long doOperation(long value);
+
+  @Override
+  protected LongStorage run(AbstractLongStorage storage) {
+    BitSet newMissing = new BitSet();
+    long[] newVals = new long[storage.size()];
+    for (int i = 0; i < storage.size(); i++) {
+      if (!storage.isNa(i)) {
+        newVals[i] = doOperation(storage.getItem(i));
+      } else {
+        newMissing.set(i);
+      }
+    }
+
+    return new LongStorage(newVals, newVals.length, newMissing);
+  }
+}
@@ -59,6 +59,9 @@ public static final class Maps {
     public static final String DIV = "/";
     public static final String MOD = "%";
     public static final String POWER = "^";
+    public static final String TRUNCATE = "truncate";
+    public static final String CEIL = "ceil";
+    public static final String FLOOR = "floor";
     public static final String NOT = "not";
     public static final String AND = "&&";
     public static final String OR = "||";

@@ -8,6 +8,7 @@
 import org.enso.table.data.column.operation.map.numeric.LongBooleanOp;
 import org.enso.table.data.column.operation.map.numeric.LongIsInOp;
 import org.enso.table.data.column.operation.map.numeric.LongNumericOp;
+import org.enso.table.data.column.operation.map.UnaryLongToLongOp;
 import org.enso.table.data.column.storage.BoolStorage;
 import org.enso.table.data.column.storage.Storage;
 
@@ -128,6 +129,27 @@ public Long doLong(
                     "Internal error: Power operation should cast to double.");
               }
             })
+        .add(
+            new UnaryLongToLongOp(Maps.TRUNCATE) {
+              @Override
+              protected long doOperation(long a) {
+                return a;
+              }
+            })
+        .add(
+            new UnaryLongToLongOp(Maps.CEIL) {
+              @Override
+              protected long doOperation(long a) {
+                return a;
+              }
+            })
+        .add(
+            new UnaryLongToLongOp(Maps.FLOOR) {
+              @Override
+              protected long doOperation(long a) {
+                return a;
+              }
+            })
         .add(
             new LongNumericOp(Storage.Maps.DIV, true) {
               @Override

@@ -8,6 +8,7 @@
 import org.enso.table.data.column.operation.map.numeric.DoubleBooleanOp;
 import org.enso.table.data.column.operation.map.numeric.DoubleIsInOp;
 import org.enso.table.data.column.operation.map.numeric.DoubleNumericOp;
+import org.enso.table.data.column.operation.map.UnaryDoubleToLongOp;
 import org.enso.table.data.column.storage.BoolStorage;
 import org.enso.table.data.column.storage.Storage;
 import org.enso.table.data.column.storage.type.FloatType;
@@ -244,6 +245,27 @@ protected double doDouble(
                 return Math.pow(a, b);
               }
             })
+        .add(
+            new UnaryDoubleToLongOp(Maps.TRUNCATE) {
+              @Override
+              protected long doOperation(double a) {
+                return (long) a;
+              }
+            })
+        .add(
+            new UnaryDoubleToLongOp(Maps.CEIL) {
+              @Override
+              protected long doOperation(double a) {
+                return (long) Math.ceil(a);
+              }
+            })
+        .add(
+            new UnaryDoubleToLongOp(Maps.FLOOR) {
+              @Override
+              protected long doOperation(double a) {
+                return (long) Math.floor(a);
+              }
+            })
         .add(
             new DoubleBooleanOp(Maps.LT) {
               @Override

@@ -0,0 +1,46 @@
+from Standard.Base import all
+
+from Standard.Table import Column
+from Standard.Test import Bench, Faker
+
+## Bench Utilities ============================================================
+
+vector_size = 1000000
+iter_size = 100
+num_iterations = 10
+
+# The Benchmarks ==============================================================
+
+bench =
+    ## No specific significance to this constant, just fixed to make generated set deterministic
+    fixed_random_seed = 1644575867
+    faker = Faker.new fixed_random_seed
+
+    IO.println <| "Creating floats"
+    floats_vec = Vector.new vector_size _->(faker.decimal -1000000000 1000000000)
+    floats = Column.from_vector "floats" floats_vec
+
+    IO.println <| "Creating integers"
+    ints_vec = Vector.new vector_size _->(faker.integer -1000000000 1000000000)
+    ints = Column.from_vector "ints" ints_vec
+
+    IO.println <| ".round floats"
+    Bench.measure floats.round "Column.round floats" iter_size num_iterations
+
+    IO.println <| ".truncate floats"
+    Bench.measure floats.truncate "Column.truncate floats" iter_size num_iterations
+    IO.println <| ".ceil floats"
+    Bench.measure floats.ceil "Column.ceil floats" iter_size num_iterations
+    IO.println <| ".floor floats"
+    Bench.measure floats.floor "Column.floor floats" iter_size num_iterations
+
+    IO.println <| ".round ints"
+    Bench.measure ints.round "Column.round ints" iter_size num_iterations
+    IO.println <| ".truncate ints"
+    Bench.measure ints.truncate "Column.truncate ints" iter_size num_iterations
+    IO.println <| ".ceil ints"
+    Bench.measure ints.ceil "Column.ceil ints" iter_size num_iterations
+    IO.println <| ".floor ints"
+    Bench.measure ints.floor "Column.floor ints" iter_size num_iterations
+
+main = bench
@@ -1,4 +1,5 @@
 import project.Collections
+import project.Column_Numeric
 import project.Equality
 import project.Json_Bench
 import project.Natural_Order_Sort
@@ -16,6 +17,7 @@ import project.Vector
    not breaking them.
 main =
     Collections.bench
+    Column_Numeric.bench
     Equality.bench
     Json_Bench.bench
     Natural_Order_Sort.bench

@@ -8,6 +8,7 @@ import Standard.Examples
 import Standard.Test.Extensions
 
 from Standard.Table import Column, Value_Type
+from Standard.Table.Errors import Invalid_Value_Type
 from Standard.Test import Test, Test_Suite, Problems
 
 main = Test_Suite.run_main spec
@@ -131,20 +132,41 @@ spec =
             Problems.test_problem_handling action problems tester
 
     Test.group "truncate" <|
-        Test.specify "should be able to truncate a column of decimals" <|
-            Column.from_vector "foo" [1.25, 2.33, 3.57] . truncate . should_equal <| Column.from_vector "foo" [1, 2, 3]
+        Test.specify "should be able to truncate a column of floats" <|
+            Column.from_vector "foo" [1.25, 2.33, 3.57] . truncate . should_equal <| Column.from_vector "truncate([foo])" [1, 2, 3]
             Column.from_vector "foo" [1.25, 2.33, 3.57] . truncate . value_type . should_equal Value_Type.Integer
 
+        Test.specify "should also work on ints" <|
+            Column.from_vector "foo" [1, 2, 3] . truncate . should_equal <| Column.from_vector "truncate([foo])" [1, 2, 3]
+            Column.from_vector "foo" [1, 2, 3] . truncate . value_type . should_equal Value_Type.Integer
+
+        Test.specify "Should error on input of the wrong type" <|
+            Column.from_vector "foo" ["asdf", "zxcv", "qwer"] . truncate . should_fail_with Invalid_Value_Type
+
     Test.group "ceil" <|
-        Test.specify "should be able to take the ceil of a column of decimals" <|
-            Column.from_vector "foo" [1.25, 2.33, 3.57] . ceil . should_equal <| Column.from_vector "foo" [2, 3, 4]
+        Test.specify "should be able to take the ceil of a column of floats" <|
+            Column.from_vector "foo" [1.25, 2.33, 3.57] . ceil . should_equal <| Column.from_vector "ceil([foo])" [2, 3, 4]
             Column.from_vector "foo" [1.25, 2.33, 3.57] . ceil . value_type . should_equal Value_Type.Integer
 
+        Test.specify "should also work on ints" <|
+            Column.from_vector "foo" [1, 2, 3] . ceil . should_equal <| Column.from_vector "ceil([foo])" [1, 2, 3]
+            Column.from_vector "foo" [1, 2, 3] . ceil . value_type . should_equal Value_Type.Integer
+
+        Test.specify "Should error on input of the wrong type" <|
+            Column.from_vector "foo" ["asdf", "zxcv", "qwer"] . ceil . should_fail_with Invalid_Value_Type
+
     Test.group "floor" <|
-        Test.specify "should be able to take the floor of a column of decimals" <|
-            Column.from_vector "foo" [1.25, 2.33, 3.57] . floor . should_equal <| Column.from_vector "foo" [1, 2, 3]
+        Test.specify "should be able to take the floor of a column of floats" <|
+            Column.from_vector "foo" [1.25, 2.33, 3.57] . floor . should_equal <| Column.from_vector "floor([foo])" [1, 2, 3]
             Column.from_vector "foo" [1.25, 2.33, 3.57] . floor . value_type . should_equal Value_Type.Integer
 
+        Test.specify "should also work on ints" <|
+            Column.from_vector "foo" [1, 2, 3] . floor . should_equal <| Column.from_vector "floor([foo])" [1, 2, 3]
+            Column.from_vector "foo" [1, 2, 3] . floor . value_type . should_equal Value_Type.Integer
+
+        Test.specify "Should error on input of the wrong type" <|
+            Column.from_vector "foo" ["asdf", "zxcv", "qwer"] . floor . should_fail_with Invalid_Value_Type
+
     Test.group "Date_Time truncate" <|
         Test.specify "should be able to truncate a column of Date_Times" <|
             Column.from_vector "foo" [Date_Time.new 2020 10 24 1 2 3, Date_Time.new 2020 10 24 1 2 3] . truncate . should_equal <| Column.from_vector "foo" [Date.new 2020 10 24, Date.new 2020 10 24]