# The Spark Session 

## Automatic initialization

In [None]:
import databrickslabs_jupyterlab
databrickslabs_jupyterlab.__version__

In [None]:
spark

## The *is_remote* helper

In [None]:
import socket
from databrickslabs_jupyterlab import is_remote
print("Remote kernel: {}\nHost name:     {}".format(is_remote(), socket.gethostname()))

## Spark progress bar intergation

In [None]:
a = sc.range(10000).repartition(100).map(lambda x: x*x).sum()
print(a)

In [None]:
df = spark.read.csv("/databricks-datasets/bikeSharing/data-001/hour.csv", inferSchema=True, header=True)

In [None]:
df.filter(df.holiday > 0)

In [None]:
df.createOrReplaceTempView("bikes")

# dbutils modules *fs* and *secrets* are enabled

In [None]:
dbutils.help()

## Module `secrets`

In [None]:
dbutils.secrets.listScopes()

In [None]:
dbutils.secrets.list("dbjl-pytest")

In [None]:
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
# Note: Secrets are not redacted in Jupyterlab Integration!
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 

dbutils.secrets.get("dbjl-pytest", "pytest-key")

## Module `fs`

In [None]:
dbutils.fs.ls("/data")

## Nodule `notebook`

### Run external notebooks

**NOTE**: 

- Notebooks to be run need to be copied to the remote filesystem, e.g. `/dbfs/home/<username>/...` with `/dbfs/` being the posix pount of the dbfs filesystem.
Notebooks can be copied to remote dbfs with the `databricks` utility, e.g.

    `databricks --profile demo fs cp initialize.ipynb /dbfs/home/bernhard/`

- Since this notebook runs on the remote machine, this cannot be done in this notebook

In [None]:
answer = 0

dbutils.notebook.run("/dbfs/home/bernhard/initialize")

answer

In [None]:
answer = 0

%run /dbfs/home/bernhard/initialize.ipynb

answer

### Stop "run all" execution at defined locations in the notebook

In [None]:
dbutils.notebook.exit("test")

## An additional DBFS browser

In [None]:
dbbrowser.dbfs(path= "/")

In [None]:
dbbrowser.databases()

# Multi Language support

## A simplified SQL integration

Uses a `spark.sql` wrapper:
- no support of autocompletion

In [None]:
%%sql 
show tables

In [None]:
spark.sql("select * from bikes")

## A simplified Scala integration

Uses REST API 1.2:
- no Spark progress bar
- no support of autocompletion
- no incremental outputs (cell will be executed as block and all output appears at the end of execution

In [None]:
spark.sparkContext.applicationId

In [None]:
%%scala
spark.sparkContext.applicationId

In [None]:
%%scala
val a = 42
a

In [None]:
%%scala
println("start")
println(sc.range(0, 100000).repartition(100).map(x => x*x).sum())
println("end")

In [None]:
%%scala
spark.read.option("header", "true").csv("/databricks-datasets/bikeSharing/data-001/hour.csv")

In [None]:
%%scala 
display(spark.read.option("header", "true").csv("/databricks-datasets/bikeSharing/data-001/hour.csv"))