Start jupyter-lab

```bash
jupyter-lab --notebook-dir=$HOME/gits/gerashegalov/rapids-shell/src/jupyter
```
or simply open in VS Code with Jupyter extension

In [None]:
# Easy imports
import findspark
import glob
import os
import shutil
import sys
import tempfile

In [None]:
# Environment
home_dir = os.environ['HOME']
work_dir = f"{home_dir}/jupyter_run_dir"
rapids_version = '22.12.0-SNAPSHOT'
locally_built = True
tz = 'UTC'
os.environ['TZ'] = tz
os.environ['SPARK_HOME'] = f"{home_dir}/dist/spark-3.1.1-bin-hadoop3.2"
os.environ['PARALLEL_LEVEL'] = "2"
sys.path.append(f"{home_dir}/gits/NVIDIA/spark-rapids/integration_tests/src/main/python")

In [None]:
# Spark Master
cores_per_exec = 1
spark_master = f"local-cluster[1,{cores_per_exec},4096]"
# spark_master = f"local[{cores_per_exec}]"

# debugger string
# jdwp = '-agentlib:jdwp=transport=dt_socket,server=y,address=localhost:5005'
jdwp = ''

In [None]:
# Log4J debug
debugLogClasses = [
    'com.nvidia.spark.rapids.DeviceMemoryEventHandler',
    'com.nvidia.spark.rapids.RapidsBufferStore',
]

log4j_fname = tempfile.mktemp(suffix=".properties", prefix="log4j")
print(f"Wrting log4j conf to {log4j_fname}")
with open(log4j_fname, 'w+t') as log4j_fobj:
    log4j_fobj.write('''
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
''')
    for d in debugLogClasses:
        log4j_fobj.write(f"log4j.logger.{d}=ALL\n")
log4j_name_comp = log4j_fname.split(os.sep)[-1]
log4j_name_comp

In [None]:
findspark.init()
if locally_built:
  findspark.add_jars(f"{home_dir}/gits/NVIDIA/spark-rapids/dist/target/rapids-4-spark_2.12-{rapids_version}-cuda11.jar")
else:
  findspark.add_packages(f"com.nvidia:rapids-4-spark_2.12:{rapids_version}")

In [None]:
import pyspark
from pyspark.sql.functions import *
spark_rapids_conf = pyspark.SparkConf(loadDefaults=False)\
    .setAll([
        ('spark.driver.extraJavaOptions', f"-Dai.rapids.refcount.debug=true -Dlog4j.debug=true -Dlog4j.configuration=file://{log4j_fname}"),
        ('spark.executor.extraJavaOptions', f"-Dai.rapids.refcount.debug=true {jdwp} -Dlog4j.debug=true -Dlog4j.configuration=file://{log4j_fname}"),
        ('spark.executorEnv.TZ', tz),
        ('spark.plugins', 'com.nvidia.spark.SQLPlugin'),
        ('spark.rapids.sql.explain', 'ALL'),   
    ])
spark = pyspark.sql.SparkSession.builder\
    .config(conf=spark_rapids_conf)\
    .appName('RAPIDS PySpark Notebook')\
    .master(spark_master)\
    .getOrCreate()

In [None]:
spark

In [None]:
spark._jvm.com.nvidia.spark.rapids.RapidsPluginUtils.loadProps("cudf-java-version-info.properties")

In [None]:
spark._jvm.com.nvidia.spark.rapids.RapidsPluginUtils.loadProps("spark-rapids-jni-version-info.properties")

In [None]:
spark._jvm.com.nvidia.spark.rapids.RapidsPluginUtils.loadProps("rapids4spark-version-info.properties")

In [None]:
# Enable debug for Rapids Stores, log4j propereties are useless because REPL
# resets setLogLeve for the root logger
for c in debugLogClasses:
    spark._jvm.org.apache.log4j.Logger\
        .getLogger(c)\
        .setLevel(spark._jvm.org.apache.log4j.Level.ALL)

## Datagen from integration tests

In [None]:
import spark_init_internal
setattr(spark_init_internal, '_spark', spark)
from data_gen import *

In [None]:
# datagen works?
unary_op_df(spark=spark, gen=TimestampGen(nullable=False), length=3, num_slices=1).show(truncate=False)