# cuDF/Java with IJava kernel https://github.com/SpencerPark/IJava

Maven Install spark-rapids-jni or use a remote repo

In [1]:
// For releases use
// %maven "com.nvidia:spark-rapids-jni:23.08.0"

In [2]:
/* Run compile before 
./build/build-in-docker compile -DCPP_PARALLEL_LEVEL=6 -DGPU_ARCHS=NATIVE -DskipTests=true
*/ 
String repoDir = System.getProperty("user.home") + "/gits/NVIDIA/spark-rapids-jni"

In [3]:
lineMagic("loadFromPOM",  Arrays.asList(repoDir + "/pom.xml"));
lineMagic("classpath", Arrays.asList(repoDir + "/target/classes"));

[/home/gshegalov/gits/NVIDIA/spark-rapids-jni/target/classes]

In [4]:
import ai.rapids.cudf.*;
import com.nvidia.spark.rapids.jni.*;
ai.rapids.cudf.Table.class.getClassLoader().getResource("ai/rapids/cudf/Table.class").toString()

file:/home/gshegalov/gits/NVIDIA/spark-rapids-jni/target/classes/ai/rapids/cudf/Table.class

In [5]:
TableDebug printer = TableDebug.builder().withOutput(TableDebug.Output.STDOUT).build()

SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.


In [6]:
Table table0 = new Table.TestBuilder()
    .column(
        "NaN",
        "9NaN",
        "9NaN6",
        "NaN6",
        " 9 ",
        "9 ",
        "  9",
        "170",
        "100", 
        "224", 
        "324", 
        "476")
    .build();

In [7]:
printer.debug("table0", table0)

DEBUG table0 Table{columns=[ColumnVector{rows=12, type=STRING, nullCount=Optional[0], offHeap=(ID: 6 7ff1613f3000)}], cudfTable=140674695378400, rows=12}
GPU COLUMN 0 - NC: 0 DATA: DeviceMemoryBufferView{address=0x7ff0cec00000, length=39, id=-1} VAL: null
COLUMN 0 - STRING
0 "NaN" 4e614e
1 "9NaN" 394e614e
2 "9NaN6" 394e614e36
3 "NaN6" 4e614e36
4 " 9 " 203920
5 "9 " 3920
6 "  9" 202039
7 "170" 313730
8 "100" 313030
9 "224" 323234
10 "324" 333234
11 "476" 343736


In [8]:
printer.debug("regexed", table0.getColumn(0).replaceRegex(new RegexProgram("^[0-9]"), Scalar.fromString("")))

GPU COLUMN regexed - NC: 0 DATA: DeviceMemoryBufferView{address=0x7ff0cec01000, length=31, id=-1} VAL: null
COLUMN regexed - STRING
0 "NaN" 4e614e
1 "NaN" 4e614e
2 "NaN6" 4e614e36
3 "NaN6" 4e614e36
4 " 9 " 203920
5 " " 20
6 "  9" 202039
7 "70" 3730
8 "00" 3030
9 "24" 3234
10 "24" 3234
11 "76" 3736


In [9]:
printer.debug("intsUsingBase16", CastStrings.toIntegersWithBase(table0.getColumn(0), 16));

GPU COLUMN intsUsingBase16 - NC: 2 DATA: DeviceMemoryBufferView{address=0x7ff0cec00e00, length=96, id=-1} VAL: DeviceMemoryBufferView{address=0x7ff0cec00c00, length=64, id=-1}
COLUMN intsUsingBase16 - UINT64
0 NULL
1 9
2 9
3 NULL
4 9
5 9
6 9
7 368
8 256
9 548
10 804
11 1142


In [10]:
printer.debug("hexsRoundTrip16", CastStrings.fromIntegersWithBase(CastStrings.toIntegersWithBase(table0.getColumn(0), 16), 16))

GPU COLUMN hexsRoundTrip16 - NC: 2 DATA: DeviceMemoryBufferView{address=0x7ff0cec02200, length=20, id=-1} VAL: DeviceMemoryBufferView{address=0x7ff0cec02000, length=64, id=-1}
COLUMN hexsRoundTrip16 - STRING
0 NULL
1 "9" 39
2 "9" 39
3 NULL
4 "9" 39
5 "9" 39
6 "9" 39
7 "170" 313730
8 "100" 313030
9 "224" 323234
10 "324" 333234
11 "476" 343736


In [11]:
printer.debug("intsUsingBase10", CastStrings.toIntegersWithBase(table0.getColumn(0), 10));

GPU COLUMN intsUsingBase10 - NC: 2 DATA: DeviceMemoryBufferView{address=0x7ff0cec01400, length=96, id=-1} VAL: DeviceMemoryBufferView{address=0x7ff0cec01200, length=64, id=-1}
COLUMN intsUsingBase10 - UINT64
0 NULL
1 9
2 9
3 NULL
4 9
5 9
6 9
7 170
8 100
9 224
10 324
11 476


In [12]:
printer.debug("intsRoundTrip10", CastStrings.fromIntegersWithBase(CastStrings.toIntegersWithBase(table0.getColumn(0), 10), 10))

GPU COLUMN intsRoundTrip10 - NC: 2 DATA: DeviceMemoryBufferView{address=0x7ff0cec02400, length=20, id=-1} VAL: DeviceMemoryBufferView{address=0x7ff0cec01a00, length=64, id=-1}
COLUMN intsRoundTrip10 - STRING
0 NULL
1 "9" 39
2 "9" 39
3 NULL
4 "9" 39
5 "9" 39
6 "9" 39
7 "170" 313730
8 "100" 313030
9 "224" 323234
10 "324" 333234
11 "476" 343736


In [7]:
ColumnView cv = table0.getColumn(1).findAndReplaceAll(
    ColumnVector.fromStrings("string 1", "string 1000"),
    ColumnVector.fromStrings("Something", "Else"))

In [8]:
printer.debug("cv", cv)

GPU COLUMN cv - NC: 0 DATA: DeviceMemoryBufferView{address=0x7fe75b202200, length=13, id=-1} VAL: DeviceMemoryBufferView{address=0x7fe75b201e00, length=64, id=-1}
COLUMN cv - STRING
0 "Something" 536f6d657468696e67
1 "Else" 456c7365
