Start jupyter-lab

```bash
jupyter-lab --notebook-dir=$HOME/gits/gerashegalov/rapids-shell/src/jupyter
```
or simply open in VS Code with Jupyter extension

# Repro for [TIMESTAMP_MICROS missing logicalType](https://github.com/NVIDIA/spark-rapids/issues/8778) 

In [1]:
import datetime
import numpy
import fastparquet
import findspark
import glob
import os
import pandas
import sys
import tempfile

### Versions

In [2]:
cuda_version = 'cuda11'
hadoop_version = '3'
rapids_version = '23.06.0'
scala_version = '2.12'
spark_version = '3.3.2'

### Environment Variables

In [3]:
os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-11-openjdk-amd64'
os.environ['SPARK_LOCAL_IP'] = '127.0.0.1'
os.environ['TZ'] = 'UTC'

In [4]:
home_dir = os.environ['HOME']
work_dir = f"{home_dir}/jupyter_run_dir"
m2_local_repo = f"{home_dir}/.m2/repository"
groupId = "com.nvidia"
artifactId = f"rapids-4-spark_{scala_version}"
dist_jar = f"{artifactId}-{rapids_version}-{cuda_version}.jar"
spark_home = f"{home_dir}/dist/spark-{spark_version}-bin-hadoop{hadoop_version}"

### Find Spark 

In [5]:
findspark.init(spark_home = spark_home)
findspark.add_jars(f"{m2_local_repo}/com/nvidia/{artifactId}/{rapids_version}/{dist_jar}")

import pyspark

### Configure Spark

In [6]:
cores_per_exec = 1
jdwp = '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005'
spark_master = f"local[{cores_per_exec}]"
spark_builder = pyspark.sql.SparkSession.builder
spark_builder.config('spark.app.name', 'RAPIDS PySpark Notebook')
spark_builder.config('spark.driver.extraJavaOptions', f"-Dai.rapids.cudf.preserve-dependencies=true {jdwp}")
spark_builder.config('spark.master', spark_master)
spark_builder.config('spark.plugins', 'com.nvidia.spark.SQLPlugin')
spark_builder.config('spark.rapids.sql.enabled', True)
spark_builder.config('spark.rapids.sql.explain', 'ALL')

spark = spark_builder.getOrCreate()

Listening for transport dt_socket at address: 5005
23/07/25 22:43:08 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


23/07/25 22:43:09 WARN RapidsPluginUtils: RAPIDS Accelerator 23.06.0 using cudf 23.06.0.
23/07/25 22:43:09 WARN RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.
23/07/25 22:43:09 WARN RapidsPluginUtils: spark.rapids.sql.explain is set to `ALL`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.


# Test Scenario

In [7]:
df = spark.createDataFrame([(datetime.datetime(3023, 7, 14, 7, 38, 45, 418688),)], 'ts timestamp')
cpu_path = tempfile.mkdtemp("cpu_ts")
gpu_path = tempfile.mkdtemp("gpu_ts")
spark.conf.set('spark.sql.parquet.outputTimestampType', 'TIMESTAMP_MICROS')

In [None]:
def spark_to_fastparquet(on_gpu = False):
    spark.conf.set('spark.rapids.sql.enabled', on_gpu)
    path = gpu_path if on_gpu else cpu_path
    df.write.mode('overwrite').parquet(path)
    spark.read.parquet(path).show(truncate = False)
    file_path, = glob.glob(f"{path}/*.parquet")
    fastparquet_file = fastparquet.ParquetFile(file_path)
    print(fastparquet_file.head(1))
    print(fastparquet_file.fmd)

Read from CPU Spark

In [None]:
spark_to_fastparquet(on_gpu = False)

Read from GPU Spark

In [None]:
spark_to_fastparquet(on_gpu = True)

# Python Package Versions

In [None]:
for p in [fastparquet, numpy, pandas]:
    print(f"name={p.__name__} version={p.__version__}\n")

In [None]:
pip list