Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arrow language #8512

Merged
merged 35 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6755164
First take at implementing Arrow
hubertp Dec 11, 2023
29b7ce6
Fix implementation for Date64
hubertp Dec 11, 2023
2c80897
Fix native image build
hubertp Dec 12, 2023
29ed745
Add IntX Arrow type
hubertp Dec 13, 2023
3a66ce1
missing file
hubertp Dec 13, 2023
fc68ca0
Addressing PR comments
hubertp Dec 13, 2023
fd138b2
Accept any number for new instance size
hubertp Dec 13, 2023
f01ea3a
Support casting of pointers
hubertp Dec 14, 2023
6446bd1
Fix native image build
hubertp Dec 15, 2023
e451806
Support nullable vectors
hubertp Dec 15, 2023
67ba40d
Drop arrow-memory-core dependency
hubertp Dec 15, 2023
5147628
fmt
hubertp Dec 15, 2023
c13efb0
Refactored tests and added Int64 tests
hubertp Dec 15, 2023
f52cbd3
Memory-mapped non-null bitmaps
hubertp Dec 15, 2023
7381959
changelog
hubertp Dec 15, 2023
88c80ab
Remove (currently) unsupported types
hubertp Dec 15, 2023
ab3cee8
nits
hubertp Dec 18, 2023
e9c1d0b
post rebase changes
hubertp Jan 9, 2024
6411d60
one more tweak
hubertp Jan 9, 2024
1ac4557
PR review
hubertp Jan 9, 2024
393b465
s/build/create
hubertp Jan 9, 2024
9dbe881
PR review
hubertp Jan 9, 2024
caefb7a
Move arrow language project module-compatible
hubertp Jan 10, 2024
9462066
fix native build
hubertp Jan 10, 2024
f5855e7
nits
hubertp Jan 10, 2024
2e8d7cb
revert, don't want to deal with license update
hubertp Jan 10, 2024
fbb0ee4
PR review
hubertp Jan 11, 2024
431bd5c
nit
hubertp Jan 11, 2024
81de77f
get rid of proxy
hubertp Jan 11, 2024
52bae38
s/instantiate/execute
hubertp Jan 11, 2024
f5a161d
nits
hubertp Jan 11, 2024
a01b9c0
Merge branch 'develop' into wip/hubert/7755-arrow
mergify[bot] Jan 12, 2024
2cb5cf4
Apply suggestions from code review
hubertp Jan 12, 2024
368d5b0
Merge branch 'develop' into wip/hubert/7755-arrow
hubertp Jan 12, 2024
7abe3da
PR review
hubertp Jan 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,7 @@
- [Export of non-existing symbols results in error][7960]
- [Upgrade GraalVM to 23.1.0 JDK21][7991]
- [Added opt-in type checks of return type][8502]
- [Introduce Arrow language][8512]
- [DataflowError.withoutTrace doesn't store stacktrace][8608]

[3227]: https://github.com/enso-org/enso/pull/3227
Expand Down Expand Up @@ -1166,6 +1167,7 @@
[7960]: https://github.com/enso-org/enso/pull/7960
[7991]: https://github.com/enso-org/enso/pull/7991
[8502]: https://github.com/enso-org/enso/pull/8502
[8512]: https://github.com/enso-org/enso/pull/8512
[8608]: https://github.com/enso-org/enso/pull/8608

# Enso 2.0.0-alpha.18 (2021-10-12)
Expand Down
48 changes: 47 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ lazy val enso = (project in file("."))
`runtime-parser`,
`runtime-compiler`,
`runtime-language-epb`,
`runtime-language-arrow`,
`runtime-instrument-common`,
`runtime-instrument-id-execution`,
`runtime-instrument-repl-debugger`,
Expand Down Expand Up @@ -496,6 +497,7 @@ val hamcrestVersion = "1.3"
val netbeansApiVersion = "RELEASE180"
val fansiVersion = "0.4.0"
val httpComponentsVersion = "4.4.1"
val apacheArrowVersion = "14.0.1"

// ============================================================================
// === Utility methods =====================================================
Expand Down Expand Up @@ -1487,6 +1489,49 @@ lazy val `runtime-language-epb` =
)
)

lazy val `runtime-language-arrow` =
(project in file("engine/runtime-language-arrow"))
.enablePlugins(JPMSPlugin)
.settings(
crossPaths := false,
autoScalaLibrary := false,
inConfig(Compile)(truffleRunOptionsSettings),
instrumentationSettings,
libraryDependencies ++= GraalVM.modules ++ Seq(
hubertp marked this conversation as resolved.
Show resolved Hide resolved
"junit" % "junit" % junitVersion % Test,
"com.github.sbt" % "junit-interface" % junitIfVersion % Test,
"org.slf4j" % "slf4j-nop" % slf4jVersion % Test,
"org.slf4j" % "slf4j-api" % slf4jVersion % Test,
"org.apache.arrow" % "arrow-vector" % apacheArrowVersion % Test,
"org.apache.arrow" % "arrow-memory-netty" % apacheArrowVersion % Test
),
hubertp marked this conversation as resolved.
Show resolved Hide resolved
modulePath := {
val updateReport = (Test / update).value
JPMSUtils.filterModulesFromUpdate(
updateReport,
GraalVM.modules,
streams.value.log,
shouldContainAll = true
) ++ Seq(
(LocalProject(
"runtime-language-arrow"
) / Compile / productDirectories).value.head
)
},
Test / patchModules := {
val testClassesDir = (Test / productDirectories).value.head
Map("org.enso.interpreter.arrow" -> Seq(testClassesDir))
hubertp marked this conversation as resolved.
Show resolved Hide resolved
},
Test / addModules := Seq("org.enso.interpreter.arrow"),
Test / javaOptions ++= Seq(
"--add-opens=java.base/java.nio=org.enso.interpreter.arrow", // DirectByteBuffer in MemoryUtil init is in-accessible
"--add-opens=java.base/java.nio=ALL-UNNAMED" // Tests use Apache Arrow
),
hubertp marked this conversation as resolved.
Show resolved Hide resolved
Test / addReads := {
Map("org.enso.interpreter.arrow" -> Seq("ALL-UNNAMED"))
hubertp marked this conversation as resolved.
Show resolved Hide resolved
}
)

/** `runtime-test-instruments` project contains Truffle instruments that are used solely for testing.
* It is compiled into an explicit Java module. Note that this project cannot have compile-time dependency on `runtime`
* project, so if you need access to classes from `runtime`, you need to use reflection.
Expand Down Expand Up @@ -2075,7 +2120,8 @@ lazy val `engine-runner` = project
"com.sun.imageio",
"com.sun.jna.internal.Cleaner",
"com.sun.jna.Structure$FFIType",
"akka.http"
"akka.http",
"org.enso.interpreter.arrow.util.MemoryUtil"
)
)
.dependsOn(assembly)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
open module org.enso.interpreter.arrow {
hubertp marked this conversation as resolved.
Show resolved Hide resolved
requires org.graalvm.truffle;

provides com.oracle.truffle.api.provider.TruffleLanguageProvider with
org.enso.interpreter.arrow.ArrowLanguageProvider;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package org.enso.interpreter.arrow;

import com.oracle.truffle.api.TruffleLanguage;

final class ArrowContext {
private final TruffleLanguage.Env env;

public ArrowContext(TruffleLanguage.Env env) {
this.env = env;
}

public void initialize() {
//
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package org.enso.interpreter.arrow;

import com.oracle.truffle.api.CallTarget;
import com.oracle.truffle.api.TruffleLanguage;
import org.enso.interpreter.arrow.node.ArrowEvalNode;

/** An internal language that implements Arrow specification. */
@TruffleLanguage.Registration(
id = ArrowLanguage.ID,
name = "Truffle implementation of Arrow",
characterMimeTypes = {ArrowLanguage.MIME},
defaultMimeType = ArrowLanguage.MIME,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have to explicitly specify internal = true. By default internal = false.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ArrowLanguage shall be an experimental feature accessible via foreign arrow xyz = """ syntax. I am afraid the language needs to be non-internal to be exposed in the foreign set of language (as of #7882).

In any case, it'd be good if ArrowLanguage wasn't accessible by default (throw a parsing error for example). Possibly shield it with

var canArrow = false;
assert canArrow = true;
if (!canArrow) throw ...

contextPolicy = TruffleLanguage.ContextPolicy.SHARED)
public class ArrowLanguage extends TruffleLanguage<ArrowContext> {

public static final String ID = "arrow";
public static final String MIME = "application/vnd.apache.arrow.file";

public ArrowLanguage() {}

@Override
protected ArrowContext createContext(TruffleLanguage.Env env) {
var ctx = new ArrowContext(env);
return ctx;
}

@Override
protected void initializeContext(ArrowContext context) {
context.initialize();
}

@Override
protected CallTarget parse(ParsingRequest request) {
ArrowParser.Result code = ArrowParser.parse(request.getSource());
if (code != null) {
ArrowEvalNode node = ArrowEvalNode.create(this, code);
return node.getCallTarget();
} else {
throw new IllegalArgumentException(
"unable to parse the code: " + request.getSource().getCharacters().toString());
}
}

@Override
protected boolean isThreadAccessAllowed(Thread thread, boolean singleThreaded) {
return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package org.enso.interpreter.arrow;

import com.oracle.truffle.api.source.Source;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public final class ArrowParser {

private ArrowParser() {}

public static class Result {
hubertp marked this conversation as resolved.
Show resolved Hide resolved
hubertp marked this conversation as resolved.
Show resolved Hide resolved

private final PhysicalLayout physicalLayout;
private final LogicalLayout logicalLayout;
private final Mode mode;

private Result(PhysicalLayout physicalLayout, LogicalLayout logicalLayout, Mode mode) {
this.physicalLayout = physicalLayout;
this.logicalLayout = logicalLayout;
this.mode = mode;
}

public PhysicalLayout getPhysicalLayout() {
return physicalLayout;
}

public LogicalLayout getLogicalLayout() {
return logicalLayout;
}

public Mode getMode() {
return mode;
}
}

public static Result parse(Source source) {
String src = source.getCharacters().toString();
Matcher m = ARRAY_PATTERN.matcher(src);
if (m.find()) {
try {
var layout = LogicalLayout.valueOf(m.group(1));
return new Result(PhysicalLayout.Primitive, layout, Mode.Allocate);
} catch (IllegalArgumentException iae) {
// propagate warning
return null;
}
}

m = CAST_PATTERN.matcher(src);
if (m.find()) {
try {
var layout = LogicalLayout.valueOf(m.group(1));
return new Result(PhysicalLayout.Primitive, layout, Mode.Cast);
} catch (IllegalArgumentException iae) {
// propagate warning
return null;
}
}
return null;
}

private static final Pattern ARRAY_PATTERN = Pattern.compile("new\\[(.+)\\]");
private static final Pattern CAST_PATTERN = Pattern.compile("cast\\[(.+)\\]");

public enum Mode {
Allocate,
Cast
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package org.enso.interpreter.arrow;

import org.enso.interpreter.arrow.runtime.SizeInBytes;

public enum LogicalLayout implements SizeInBytes {
Date32(32),
Date64(64),
Int8(8),
Int16(16),
Int32(32),
Int64(64);

private final int bits;

LogicalLayout(int bits) {
this.bits = bits;
}

@Override
public int sizeInBytes() {
return bits / 8;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package org.enso.interpreter.arrow;

public enum PhysicalLayout {
Primitive,
VariableSizeBinary
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.enso.interpreter.arrow.node;

import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.arrow.LogicalLayout;
import org.enso.interpreter.arrow.runtime.ArrowCastToFixedSizeArrayFactory;

public class ArrowCastFixedSizeNode extends Node {

static ArrowCastFixedSizeNode create() {
return new ArrowCastFixedSizeNode();
}

public Object execute(LogicalLayout layoutType) {
return new ArrowCastToFixedSizeArrayFactory(layoutType);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.enso.interpreter.arrow.node;

import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.frame.FrameDescriptor;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.RootNode;
import org.enso.interpreter.arrow.ArrowLanguage;
import org.enso.interpreter.arrow.ArrowParser;

public class ArrowEvalNode extends RootNode {
private final ArrowParser.Result code;

@Child private ArrowFixedSizeNode fixedPhysicalLayout = ArrowFixedSizeNode.create();
@Child private ArrowCastFixedSizeNode castToFixedPhysicalLayout = ArrowCastFixedSizeNode.create();

public static ArrowEvalNode create(ArrowLanguage language, ArrowParser.Result code) {
return new ArrowEvalNode(language, code);
}

private ArrowEvalNode(ArrowLanguage language, ArrowParser.Result code) {
super(language, new FrameDescriptor());
this.code = code;
}

public Object execute(VirtualFrame frame) {
return switch (code.getPhysicalLayout()) {
case Primitive -> switch (code.getMode()) {
case Allocate -> fixedPhysicalLayout.execute(code.getLogicalLayout());
case Cast -> castToFixedPhysicalLayout.execute(code.getLogicalLayout());
};
default -> throw CompilerDirectives.shouldNotReachHere("unsupported physical layout");
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.enso.interpreter.arrow.node;

import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.arrow.LogicalLayout;
import org.enso.interpreter.arrow.runtime.ArrowFixedSizeArrayFactory;

public class ArrowFixedSizeNode extends Node {

static ArrowFixedSizeNode create() {
return new ArrowFixedSizeNode();
}

public Object execute(LogicalLayout layoutType) {
return new ArrowFixedSizeArrayFactory(layoutType);
}
}
Loading
Loading