
## Overview

This notebook will show you how to create and query a table or DataFrame that you uploaded to DBFS. [DBFS](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) is a Databricks File System that allows you to store data for querying inside of Databricks. This notebook assumes that you have a file already inside of DBFS that you would like to read from.

This notebook is written in **Python** so the default cell type is Python. However, you can use different languages by using the `%LANGUAGE` syntax. Python, Scala, SQL, and R are all supported.

In [0]:
# File location and type
f1_location = "/FileStore/tables/members.csv"
f2_location = "/FileStore/tables/bookings.csv"
f3_location = "/FileStore/tables/facilities.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
members_df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(f1_location)

bookings_df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(f2_location)

facilities_df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(f3_location)

In [0]:
# Create a view or table

temp_bookings = "bookings"
temp_members = "members"
temp_facilities = "facilities"

members_df.createOrReplaceTempView(temp_members)
bookings_df.createOrReplaceTempView(temp_bookings)
facilities_df.createOrReplaceTempView(temp_facilities)

In [0]:
%sql
select * from facilities limit 5

facid,name,membercost,guestcost,initialoutlay,monthlymaintenance
0,Tennis Court 1,5.0,25.0,10000,200
1,Tennis Court 2,5.0,25.0,8000,200
2,Badminton Court,0.0,15.5,4000,50
3,Table Tennis,0.0,5.0,320,10
4,Massage Room 1,35.0,80.0,4000,3000



## Join Exercises

In [0]:
%sql
SELECT b.starttime
FROM members m
JOIN bookings b ON m.memid = b.memid
WHERE m.firstname LIKE 'David'AND m.surname LIKE 'Farrell'

starttime
2012-09-18T09:00:00.000+0000
2012-09-18T17:30:00.000+0000
2012-09-18T13:30:00.000+0000
2012-09-18T20:00:00.000+0000
2012-09-19T09:30:00.000+0000
2012-09-19T15:00:00.000+0000
2012-09-19T12:00:00.000+0000
2012-09-20T15:30:00.000+0000
2012-09-20T11:30:00.000+0000
2012-09-20T14:00:00.000+0000


In [0]:
%sql
SELECT b.starttime, f.name
FROM facilities f
INNER JOIN bookings b ON f.facid = b.facid
WHERE 
    f.name LIKE 'Tennis Court%'
    AND b.starttime BETWEEN '2012-09-21' AND '2012-09-22'
ORDER BY 
    b.starttime;

starttime,name
2012-09-21T08:00:00.000+0000,Tennis Court 1
2012-09-21T08:00:00.000+0000,Tennis Court 2
2012-09-21T09:30:00.000+0000,Tennis Court 1
2012-09-21T10:00:00.000+0000,Tennis Court 2
2012-09-21T11:30:00.000+0000,Tennis Court 2
2012-09-21T12:00:00.000+0000,Tennis Court 1
2012-09-21T13:30:00.000+0000,Tennis Court 1
2012-09-21T14:00:00.000+0000,Tennis Court 2
2012-09-21T15:30:00.000+0000,Tennis Court 1
2012-09-21T16:00:00.000+0000,Tennis Court 2


In [0]:
%sql
SELECT 
m.firstname AS mfname, 
m.surname AS msname, 
r.firstname AS rfname, 
r.surname AS rsname
FROM members m
LEFT OUTER JOIN members r ON r.memid = m.recommendedby
ORDER BY msname, mfname;


mfname,msname,rfname,rsname
Florence,Bader,Ponder,Stibbons
Anne,Baker,Ponder,Stibbons
Timothy,Baker,Jemima,Farrell
Tim,Boothe,Tim,Rownam
Gerald,Butters,Darren,Smith
Joan,Coplin,Timothy,Baker
Erica,Crumpet,Tracy,Smith
Nancy,Dare,Janice,Joplette
David,Farrell,,
Jemima,Farrell,,


In [0]:
%sql
SELECT DISTINCT m.firstname || ' ' || m.surname AS member, f.name as facility
FROM members m
JOIN bookings b ON m.memid = b.memid
JOIN facilities f ON b.facid = f.facid
WHERE f.name LIKE 'Tennis Court%'
ORDER BY member, facility;

member,facility
Anne Baker,Tennis Court 1
Anne Baker,Tennis Court 2
Burton Tracy,Tennis Court 1
Burton Tracy,Tennis Court 2
Charles Owen,Tennis Court 1
Charles Owen,Tennis Court 2
Darren Smith,Tennis Court 2
David Farrell,Tennis Court 1
David Farrell,Tennis Court 2
David Jones,Tennis Court 1


In [0]:
%sql
SELECT DISTINCT 
    m.firstname || ' ' || m.surname AS member,
    r.firstname || ' ' || r.surname AS recommender
FROM members m
LEFT JOIN members r ON r.memid = m.recommendedby
ORDER BY member;

member,recommender
Anna Mackenzie,Darren Smith
Anne Baker,Ponder Stibbons
Burton Tracy,
Charles Owen,Darren Smith
Darren Smith,
David Farrell,
David Jones,Janice Joplette
David Pinker,Jemima Farrell
Douglas Jones,David Jones
Erica Crumpet,Tracy Smith



## Aggregation

In [0]:
%sql
SELECT m.recommendedby, COUNT(*)
FROM members as m
WHERE recommendedby is not null
GROUP BY recommendedby
ORDER BY recommendedby;

recommendedby,count(1)
1,5
2,3
3,1
4,2
5,1
6,1
9,2
11,1
13,2
15,1


In [0]:
%sql
SELECT b.facid, SUM(slots) AS total_slots
FROM bookings as b
GROUP BY b.facid
ORDER BY b.facid;

facid,total_slots
0,1320
1,1278
2,1209
3,830
4,1404
5,228
6,1104
7,908
8,911


In [0]:
%sql
SELECT b.facid, SUM(b.slots) AS total_slots
FROM bookings AS b
WHERE b.starttime >= '2012-09-01' AND b.starttime < '2012-10-01'
GROUP BY b.facid
ORDER BY total_slots ASC;

facid,total_slots
5,122
3,422
7,426
8,471
6,540
2,570
1,588
0,591
4,648


In [0]:
%sql
SELECT b.facid, 
EXTRACT(MONTH FROM b.starttime) AS month, 
SUM(b.slots) AS total_slots
FROM bookings AS b
WHERE EXTRACT(year from starttime) = 2012
GROUP BY facid, month
ORDER BY facid, month;

facid,month,total_slots
0,7,270
0,8,459
0,9,591
1,7,207
1,8,483
1,9,588
2,7,180
2,8,459
2,9,570
3,7,104


In [0]:
%sql
SELECT COUNT(DISTINCT b.memid) AS count
FROM bookings AS b

count
30


In [0]:
%sql
SELECT m.surname, m.firstname, m.memid, MIN(b.starttime) AS starttime
FROM bookings b
INNER JOIN members m on m.memid = b.memid
WHERE starttime >= '2012-09-01'
GROUP BY m.surname, m.firstname, m.memid
ORDER BY m.memid; 

surname,firstname,memid,starttime
GUEST,GUEST,0,2012-09-01T08:00:00.000+0000
Smith,Darren,1,2012-09-01T09:00:00.000+0000
Smith,Tracy,2,2012-09-01T11:30:00.000+0000
Rownam,Tim,3,2012-09-01T16:00:00.000+0000
Joplette,Janice,4,2012-09-01T15:00:00.000+0000
Butters,Gerald,5,2012-09-02T12:30:00.000+0000
Tracy,Burton,6,2012-09-01T15:00:00.000+0000
Dare,Nancy,7,2012-09-01T12:30:00.000+0000
Boothe,Tim,8,2012-09-01T08:30:00.000+0000
Stibbons,Ponder,9,2012-09-01T11:00:00.000+0000



## String & Date

In [0]:
%sql
SELECT m.surname || ', ' || m.firstname AS name
FROM members AS m

name
"GUEST, GUEST"
"Smith, Darren"
"Smith, Tracy"
"Rownam, Tim"
"Joplette, Janice"
"Butters, Gerald"
"Tracy, Burton"
"Dare, Nancy"
"Boothe, Tim"
"Stibbons, Ponder"


In [0]:
%sql
SELECT * 
FROM facilities 
WHERE upper(name) LIKE 'TENNIS%';  

facid,name,membercost,guestcost,initialoutlay,monthlymaintenance
0,Tennis Court 1,5.0,25.0,10000,200
1,Tennis Court 2,5.0,25.0,8000,200


In [0]:
%sql
SELECT memid, telephone 
FROM members 
WHERE telephone RLIKE '\\(.*\\)';

memid,telephone
0,(000) 000-0000
3,(844) 693-0723
4,(833) 942-4710
5,(844) 078-4130
6,(822) 354-9973
7,(833) 776-4001
8,(811) 433-2547
9,(833) 160-3900
10,(855) 542-5251
11,(844) 536-8036


In [0]:
%sql
SELECT substr (m.surname,1,1) as letter, count(*) as count 
FROM members m
GROUP BY letter
ORDER BY letter  

letter,count
B,5
C,2
D,1
F,2
G,2
H,1
J,3
M,1
O,1
P,2


In [0]:
%sql
WITH date_series AS (
  SELECT EXPLODE(SEQUENCE(0, 30)) AS day_offset
)
SELECT date_add('2012-10-01 00:00:00', day_offset) AS ts
FROM date_series;



ts
2012-10-01
2012-10-02
2012-10-03
2012-10-04
2012-10-05
2012-10-06
2012-10-07
2012-10-08
2012-10-09
2012-10-10


In [0]:
%sql
SELECT date_trunc('month', starttime) as month, count(*)
FROM bookings
GROUP BY month
ORDER BY month

month,count(1)
2012-07-01T00:00:00.000+0000,658
2012-08-01T00:00:00.000+0000,1472
2012-09-01T00:00:00.000+0000,1913
2013-01-01T00:00:00.000+0000,1
