In [0]:
files = dbutils.fs.ls('mnt/demo-datasets/bookstore') # finding all the files in the directory 

In [0]:
display(files)

path,name,size,modificationTime
dbfs:/mnt/demo-datasets/bookstore/books-cdc/,books-cdc/,0,0
dbfs:/mnt/demo-datasets/bookstore/books-csv/,books-csv/,0,0
dbfs:/mnt/demo-datasets/bookstore/books-csv-new/,books-csv-new/,0,0
dbfs:/mnt/demo-datasets/bookstore/books-streaming/,books-streaming/,0,0
dbfs:/mnt/demo-datasets/bookstore/customers-json/,customers-json/,0,0
dbfs:/mnt/demo-datasets/bookstore/customers-json-new/,customers-json-new/,0,0
dbfs:/mnt/demo-datasets/bookstore/orders/,orders/,0,0
dbfs:/mnt/demo-datasets/bookstore/orders-json-raw/,orders-json-raw/,0,0
dbfs:/mnt/demo-datasets/bookstore/orders-json-streaming/,orders-json-streaming/,0,0
dbfs:/mnt/demo-datasets/bookstore/orders-new/,orders-new/,0,0


In [0]:
%sql
CREATE TABLE books
  (book_id STRING, title STRING, author STRING, category STRING, price DOUBLE)
USING CSV
OPTIONS (
  header = "true",
  delimiter = ";",
  path = 'dbfs:/mnt/demo-datasets/bookstore/books-csv/export_*.csv'
);
-- LOCATION 'dbfs:/mnt/demo-datasets/bookstore/books/'

--dbfs:/mnt/demo-datasets/bookstore/books-csv/export_001.csv

In [0]:
%sql
SELECT * FROM books;

book_id,title,author,category,price
B07,The Hundred-Page Machine Learning,Andriy Burkov,Computer Science,33.0
B08,Quantum Computing for Everyone,Chris Bernhardt,Computer Science,41.0
B09,Advanced Data Structures,Peter Brass,Computer Science,24.0
B10,Beginning Database Design Solutions,Rod Stephens,Computer Science,44.0
B11,Business Intelligence for Dummies,Swain Scheps,Computer Science,38.0
B12,Big Data in Practice,Bernard Marr,Computer Science,30.0
B01,The Soul of a New Machine,Tracy Kidder,Computer Science,49.0
B02,Learning JavaScript Design Patterns,Addy Osmani,Computer Science,28.0
B03,Make Your Own Neural Network,Tariq Rashid,Computer Science,35.0
B04,Robot Dynamics and Control,Mark W. Spong,Computer Science,20.0


READING DELTA TABLE AS STREAM OF DATA

In [0]:
(spark.readStream
 .table("books")
 .createOrReplaceTempView("books_streaming_temp_vw"))

Querying the temp view 

In [0]:
%sql
SELECT * FROM books_streaming_temp_vw;

book_id,title,author,category,price
B07,The Hundred-Page Machine Learning,Andriy Burkov,Computer Science,33.0
B08,Quantum Computing for Everyone,Chris Bernhardt,Computer Science,41.0
B09,Advanced Data Structures,Peter Brass,Computer Science,24.0
B10,Beginning Database Design Solutions,Rod Stephens,Computer Science,44.0
B11,Business Intelligence for Dummies,Swain Scheps,Computer Science,38.0
B12,Big Data in Practice,Bernard Marr,Computer Science,30.0
B01,The Soul of a New Machine,Tracy Kidder,Computer Science,49.0
B02,Learning JavaScript Design Patterns,Addy Osmani,Computer Science,28.0
B03,Make Your Own Neural Network,Tariq Rashid,Computer Science,35.0
B04,Robot Dynamics and Control,Mark W. Spong,Computer Science,20.0


Some Aggregation on streaming temp view

In [0]:
%sql
SELECT author, COUNT(book_id) total_books FROM books_streaming_temp_vw 
GROUP BY author;

author,total_books
Mark W. Spong,1
Chris Bernhardt,1
Tariq Rashid,1
Peter Brass,1
Luciano Ramalho,1
Addy Osmani,1
Andriy Burkov,1
Tracy Kidder,1
Swain Scheps,1
François Chollet,1


In [0]:
%sql
SELECT * FROM books_streaming_temp_vw 
ORDER BY author; -- sorting is not supported by streaming temp view

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-693258673127663>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-693258673127663>:5[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      3[0m [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[1;32m      4[0m df [38;5;241m=[39m spark[38;5;241m.[39msql(base64[38;5;241m.[39mstandard_b64decode([38;5;124m"[39m[38;5;124mU0VMRUNUICogRlJPTSBib29rc19zdHJlYW1pbmdfdGVtcF92dyAKT1JERVIgQlkgYXV0aG9y[39m[38;5;124m"[39m)[38;5;241m.[39mdecode())
[0;32m----> 5[0m [43mdisplay[49m

In [0]:
%sql
CREATE OR REPLACE TEMP VIEW author_count_temp_view -- author_count_temp_view is also streaming temporary view
AS SELECT author,COUNT(book_id) total_books FROM books_streaming_temp_vw 
GROUP BY author;

In [0]:
(spark.table("author_count_temp_view")                               
      .writeStream  
      .trigger(processingTime='4 seconds')
      .outputMode("complete") 
      .option("checkpointLocation", "dbfs:/mnt/demo-datasets/author_counts_checkpoint")
      .table("author_counts")
)


Out[120]: <pyspark.sql.streaming.query.StreamingQuery at 0x7fc1671dccd0>

In [0]:
%sql
SELECT * FROM author_counts; 
-- Note: This is not a streaming query

author,total_books
François Chollet,1
Chris Bernhardt,1
Luciano Ramalho,1
Mark W. Spong,1
Andriy Burkov,1
Tariq Rashid,1
Tracy Kidder,1
Swain Scheps,1
Rod Stephens,1
Bernard Marr,1


In [0]:
spark.sql("SET spark.sql.hive.convertMetastoreParquet=false")

Out[131]: DataFrame[key: string, value: string]

In [0]:
%sql
INSERT INTO books VALUES ("B16", "Hands-On Deep Learning Algorithms with Python", "Sudharsan Ravichandiran", "Computer Science", 25);

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-693258673127670>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-693258673127670>:4[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      2[0m [38;5;28;01mdef[39;00m [38;5;21m____databricks_percent_sql[39m():
[1;32m      3[0m   [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[0;32m----> 4[0m   df [38;5;241m=[39m [43mspark[49m[38;5;241;43m.[39;49m[43msql[49m[43m([49m[43mbase64[49m[38;5;241;43m.[39;49m[43mstandard_b64decode[49m[43m([49m[38;5;124;43m"[39;49

In [0]:
%sql
-- The graph of streaming query keeps changing as the new data arrives 
-- lets insert new data in our source table(books)

INSERT INTO books
 values ("B16", "Hands-On Deep Learning Algorithms with Python", "Sudharsan Ravichandiran", "Computer Science", 25),
        ("B17", "Neural Network Methods in Natural Language Processing", "Yoav Goldberg", "Computer Science", 30),
        ("B18", "Understanding digital signal processing", "Richard Lyons", "Computer Science", 35);

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-693258673127667>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-693258673127667>:4[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      2[0m [38;5;28;01mdef[39;00m [38;5;21m____databricks_percent_sql[39m():
[1;32m      3[0m   [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[0;32m----> 4[0m   df [38;5;241m=[39m [43mspark[49m[38;5;241;43m.[39;49m[43msql[49m[43m([49m[43mbase64[49m[38;5;241;43m.[39;49m[43mstandard_b64decode[49m[43m([49m[38;5;124;43m"[39;49