# 1 The Spark Session 

## 1.1 Automatic initialization

In [1]:
spark

## 1.2 Spark progress bar intergation

In [2]:
df = spark.read.csv("/databricks-datasets/bikeSharing/data-001/hour.csv", inferSchema=True, header=True)

HBox(children=(Button(icon='arrow-circle-right', style=ButtonStyle(), tooltip='Toggle progress bar', _dom_clas…

In [3]:
df.filter(df.holiday > 0)

HBox(children=(Button(icon='arrow-circle-right', style=ButtonStyle(), tooltip='Toggle progress bar', _dom_clas…

instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
373,2011-01-17 00:00:00,1,0,1,0,1,1,0,2,0.2,0.197,0.47,0.2239,1,16,17
374,2011-01-17 00:00:00,1,0,1,1,1,1,0,2,0.2,0.197,0.44,0.194,1,15,16
375,2011-01-17 00:00:00,1,0,1,2,1,1,0,2,0.18,0.1667,0.43,0.2537,0,8,8
376,2011-01-17 00:00:00,1,0,1,3,1,1,0,2,0.18,0.1818,0.43,0.194,0,2,2
377,2011-01-17 00:00:00,1,0,1,4,1,1,0,2,0.18,0.197,0.43,0.1343,1,2,3
378,2011-01-17 00:00:00,1,0,1,5,1,1,0,2,0.18,0.197,0.43,0.1642,0,1,1
379,2011-01-17 00:00:00,1,0,1,6,1,1,0,2,0.18,0.1818,0.43,0.194,0,5,5
380,2011-01-17 00:00:00,1,0,1,7,1,1,0,2,0.16,0.1818,0.5,0.1343,4,9,13
381,2011-01-17 00:00:00,1,0,1,8,1,1,0,2,0.16,0.1515,0.47,0.2239,3,30,33
382,2011-01-17 00:00:00,1,0,1,9,1,1,0,2,0.16,0.1515,0.47,0.2239,8,39,47


# 2 Browsers

## 2.1 DBFS Browser

In [4]:
dbbrowser.dbfs(path= "/")

VBox(children=(HBox(children=(Button(icon='refresh', layout=Layout(width='40px'), style=ButtonStyle()), Button…

## 2.2 Database Browser

In [5]:
dbbrowser.databases()

VBox(children=(HBox(children=(VBox(children=(Button(icon='refresh', layout=Layout(width='40px'), style=ButtonS…

# 3 dbutils support

In [6]:
dbutils.help()

## 3.1 Module `secrets`

In [7]:
dbutils.secrets.listScopes()

[SecretScope(name='dbjl-pytest'),
 SecretScope(name='dbjl-pytest2'),
 SecretScope(name='scala-creds')]

In [8]:
dbutils.secrets.list("dbjl-pytest")

[SecretMetadata(key='pytest-key')]

### Note: Secrets are not redacted in Jupyterlab Integration!

In [9]:
dbutils.secrets.get("dbjl-pytest", "pytest-key")

'databrickslabs-jupyterlab'

## 3.2 Module `fs`

In [10]:
dbutils.fs.ls("/data")

[FileInfo(path='dbfs:/data/airbnb/', name='airbnb/', size=0),
 FileInfo(path='dbfs:/data/bank.csv', name='bank.csv', size=461474),
 FileInfo(path='dbfs:/data/covtype.csv', name='covtype.csv', size=141875378),
 FileInfo(path='dbfs:/data/covtype.parquet/', name='covtype.parquet/', size=0),
 FileInfo(path='dbfs:/data/digits/', name='digits/', size=0),
 FileInfo(path='dbfs:/data/mnist/', name='mnist/', size=0)]

In [11]:
%fs ls /FileStore

List directory '/FileStore':

FileInfo(path='dbfs:/FileStore/derby.log', name='derby.log', size=732)
FileInfo(path='dbfs:/FileStore/import-stage/', name='import-stage/', size=0)
FileInfo(path='dbfs:/FileStore/initialization.ipynb', name='initialization.ipynb', size=915)
FileInfo(path='dbfs:/FileStore/jars/', name='jars/', size=0)
FileInfo(path='dbfs:/FileStore/jupyter_debug/', name='jupyter_debug/', size=0)
FileInfo(path='dbfs:/FileStore/plots/', name='plots/', size=0)
FileInfo(path='dbfs:/FileStore/scripts/', name='scripts/', size=0)


## 3.3 Module `notebook`

### 3.3.1 Run external notebooks

- Notebooks to be run need to be copied to the remote filesystem, e.g. `/dbfs/home/<username>/...` with `/dbfs/` being the posix pount of the dbfs filesystem.
Notebooks can be copied to remote dbfs with the `databricks` utility, e.g.

    `databricks --profile demo fs cp initialize.ipynb /dbfs/home/bernhard/`

- Since this notebook runs on the remote machine, this cannot be done in this notebook

In [12]:
test = {"a": 42, "l": [0, 1, 1, 2, 3, 5, 8]}

In [13]:
to_json(test)

NameError: name 'to_json' is not defined

In [17]:
dbutils.notebook.run("/FileStore/initialization")

Running '/dbfs/FileStore/initialization.ipynb'
Successfully initialized


In [18]:
to_json(test)

'{"a": 42, "l": [0, 1, 1, 2, 3, 5, 8]}'

In [19]:
%run /dbfs/FileStore/initialization.ipynb

Successfully initialized


### 3.3.2 Stop "run all" execution at defined locations in the notebook

In [20]:
dbutils.notebook.exit("test")

Notebook exited: test


# 4 Multi Language support

## 4.1 SQL integration

Uses a `spark.sql` wrapper:
- no support of autocompletion

In [21]:
df.createOrReplaceTempView("bikes")

In [22]:
%%sql 
show tables

database,tableName,isTemporary
default,exampletable,False
,bikes,True


In [23]:
%%sql
select * from bikes

HBox(children=(Button(icon='arrow-circle-right', style=ButtonStyle(), tooltip='Toggle progress bar', _dom_clas…

instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
1,2011-01-01 00:00:00,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
2,2011-01-01 00:00:00,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
3,2011-01-01 00:00:00,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
4,2011-01-01 00:00:00,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
5,2011-01-01 00:00:00,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1
6,2011-01-01 00:00:00,1,0,1,5,0,6,0,2,0.24,0.2576,0.75,0.0896,0,1,1
7,2011-01-01 00:00:00,1,0,1,6,0,6,0,1,0.22,0.2727,0.8,0.0,2,0,2
8,2011-01-01 00:00:00,1,0,1,7,0,6,0,1,0.2,0.2576,0.86,0.0,1,2,3
9,2011-01-01 00:00:00,1,0,1,8,0,6,0,1,0.24,0.2879,0.75,0.0,1,7,8
10,2011-01-01 00:00:00,1,0,1,9,0,6,0,1,0.32,0.3485,0.76,0.0,8,6,14


## 4.2 Scala integration

Uses REST API 1.2:
- no Spark progress bar
- no support of autocompletion
- no incremental outputs (cell will be executed as block and all output appears at the end of execution

In [24]:
spark.sparkContext.applicationId

'app-20200527070031-0000'

In [25]:
%%scala
spark.sparkContext.applicationId

        
res0: String = app-20200527070031-0000


In [26]:
%%scala
val a = 42
a

   
a: Int = 42
res1: Int = 42


In [27]:
%%scala
spark.read.option("header", "true").csv("/databricks-datasets/bikeSharing/data-001/hour.csv")

    
res2: org.apache.spark.sql.DataFrame = [instant: string, dteday: string ... 15 more fields]


In [28]:
%%scala 
display(spark.read.option("header", "true").csv("/databricks-datasets/bikeSharing/data-001/hour.csv"))

    


Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0,0,1,1
5,6,2011-01-01,1,0,1,5,0,6,0,2,0.24,0.2576,0.75,0.0896,0,1,1
6,7,2011-01-01,1,0,1,6,0,6,0,1,0.22,0.2727,0.8,0,2,0,2
7,8,2011-01-01,1,0,1,7,0,6,0,1,0.2,0.2576,0.86,0,1,2,3
8,9,2011-01-01,1,0,1,8,0,6,0,1,0.24,0.2879,0.75,0,1,7,8
9,10,2011-01-01,1,0,1,9,0,6,0,1,0.32,0.3485,0.76,0,8,6,14


Data truncated, more than 1000 rows
