
## Overview

This notebook will show you how to create and query a table or DataFrame that you uploaded to DBFS. [DBFS](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) is a Databricks File System that allows you to store data for querying inside of Databricks. This notebook assumes that you have a file already inside of DBFS that you would like to read from.

This notebook is written in **Python** so the default cell type is Python. However, you can use different languages by using the `%LANGUAGE` syntax. Python, Scala, SQL, and R are all supported.

In [0]:
# File location and type
file_location = "/FileStore/tables/cleaned_all_phones.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location)

display(df)

phone_name,brand,os,inches,resolution,battery,battery_type,ram(GB),announcement_date,weight(g),storage(GB),video_720p,video_1080p,video_4K,video_8K,video_30fps,video_60fps,video_120fps,video_240fps,video_480fps,video_960fps,price(USD)
Y6II Compact,Huawei,Android 5.1,5.0,720x1280,2200,Li-Po,2,2016-09-01,140.0,16,True,False,False,False,True,False,False,False,False,False,120.0
K20 plus,LG,Android 7.0,5.3,720x1280,2700,Li-Ion,2,2016-12-01,140.0,16,False,True,False,False,True,False,False,False,False,False,100.0
P8 Lite (2017),Huawei,Android 7.0,5.2,1080x1920,3000,Li-Ion,4,2017-01-01,147.0,16,False,True,False,False,True,False,False,False,False,False,420.0
Redmi Note 4,Xiaomi,Android 6.0,5.5,1080x1920,4100,Li-Po,4,2017-01-01,165.0,32,True,True,False,False,True,False,True,False,False,False,150.0
P10,Huawei,Android 7.0,5.1,1080x1920,3200,Li-Ion,4,2017-02-01,145.0,32,True,True,True,False,True,True,False,False,False,False,420.0
Xperia XA1,Sony,Android 7.0,5.0,720x1280,2300,Li-Ion,3,2017-02-01,143.0,32,False,True,False,False,True,False,False,False,False,False,140.0
P10 Lite,Huawei,Android 7.0,5.2,1080x1920,3000,Li-Po,4,2017-02-01,146.0,32,False,True,False,False,True,False,False,False,False,False,420.0
P10 Plus,Huawei,Android 7.0,5.5,1440x2560,3750,Li-Ion,6,2017-02-01,165.0,64,True,True,True,False,True,True,False,False,False,False,170.0
Xperia XA1 Ultra,Sony,Android 7.0,6.0,1080x1920,2700,Li-Ion,4,2017-02-01,188.0,32,False,True,False,False,True,False,False,False,False,False,250.0
X power2,LG,Android 7.0,5.5,720x1280,4500,Li-Ion,2,2017-02-01,164.0,16,True,True,False,False,True,False,False,False,False,False,170.0


In [0]:
# Create a view or table

temp_table_name = "cleaned_all_phones_csv"

df.createOrReplaceTempView(temp_table_name)

In [0]:
%sql

/* Query the created temp table in a SQL cell */

select * from `cleaned_all_phones_csv`

phone_name,brand,os,inches,resolution,battery,battery_type,ram(GB),announcement_date,weight(g),storage(GB),video_720p,video_1080p,video_4K,video_8K,video_30fps,video_60fps,video_120fps,video_240fps,video_480fps,video_960fps,price(USD)
Y6II Compact,Huawei,Android 5.1,5.0,720x1280,2200,Li-Po,2,2016-09-01,140.0,16,True,False,False,False,True,False,False,False,False,False,120.0
K20 plus,LG,Android 7.0,5.3,720x1280,2700,Li-Ion,2,2016-12-01,140.0,16,False,True,False,False,True,False,False,False,False,False,100.0
P8 Lite (2017),Huawei,Android 7.0,5.2,1080x1920,3000,Li-Ion,4,2017-01-01,147.0,16,False,True,False,False,True,False,False,False,False,False,420.0
Redmi Note 4,Xiaomi,Android 6.0,5.5,1080x1920,4100,Li-Po,4,2017-01-01,165.0,32,True,True,False,False,True,False,True,False,False,False,150.0
P10,Huawei,Android 7.0,5.1,1080x1920,3200,Li-Ion,4,2017-02-01,145.0,32,True,True,True,False,True,True,False,False,False,False,420.0
Xperia XA1,Sony,Android 7.0,5.0,720x1280,2300,Li-Ion,3,2017-02-01,143.0,32,False,True,False,False,True,False,False,False,False,False,140.0
P10 Lite,Huawei,Android 7.0,5.2,1080x1920,3000,Li-Po,4,2017-02-01,146.0,32,False,True,False,False,True,False,False,False,False,False,420.0
P10 Plus,Huawei,Android 7.0,5.5,1440x2560,3750,Li-Ion,6,2017-02-01,165.0,64,True,True,True,False,True,True,False,False,False,False,170.0
Xperia XA1 Ultra,Sony,Android 7.0,6.0,1080x1920,2700,Li-Ion,4,2017-02-01,188.0,32,False,True,False,False,True,False,False,False,False,False,250.0
X power2,LG,Android 7.0,5.5,720x1280,4500,Li-Ion,2,2017-02-01,164.0,16,True,True,False,False,True,False,False,False,False,False,170.0


In [0]:
# With this registered as a temp view, it will only be available to this particular notebook. If you'd like other users to be able to query this table, you can also create a table from the DataFrame.
# Once saved, this table will persist across cluster restarts as well as allow various users across different notebooks to query this data.
# To do so, choose your table name and uncomment the bottom line.

permanent_table_name = "cleaned_all_phones_csv"

# df.write.format("parquet").saveAsTable(permanent_table_name)

In [0]:
%sql
/*Find all phones by Brand*/

SELECT phone_name from cleaned_all_phones_csv where brand like'%Samsung%'

phone_name
Galaxy Xcover 4
Galaxy J7 V
Galaxy S8+
Z4
Galaxy J7 Max
Galaxy J7 (2017)
Galaxy J7 Pro
Galaxy J5 (2017)
Galaxy Folder2
Galaxy Note FE


In [0]:
%sql
/*- Filter Phones by Screen Size
List all phones with a screen size greater than 6 inches.*/

SELECT PHONE_NAME from cleaned_all_phones_csv where CAST(inches as float) > 6 

PHONE_NAME
Galaxy S8+
Mi Max 2
Galaxy Note8
R11s
R11s Plus
moto tab
Galaxy S9+
R15
R15 Pro
F7


In [0]:
%sql
/*- Count Phones per OS
Count how many phones use each operating system (Android, iOS, etc.).*/

SELECT os, count(*) as count from cleaned_all_phones_csv where os is not null group by os order by count desc

os,count
Android 11,322
Android 10,317
Android 12,233
Android 9.0,210
Android 13,154
Android 8.1,83
Android 8.0,55
Android 7.0,33
Android 7.1.1,17
Android 7.1,14


In [0]:
%sql
/*Intermediate Level
- Average Battery Capacity Per Brand
*/

SELECT BRAND, ROUND(AVG(BATTERY),2) AS BATTERY from CLEANED_ALL_PHONES_CSV 
WHERE BATTERY IS NOT NULL GROUP BY BRAND

BRAND,BATTERY
Sony,3626.22
Realme,4808.28
Huawei,3932.63
Xiaomi,4647.88
Lenovo,4357.21
Oppo,4378.73
Samsung,4352.5
Google,3902.0
LG,3586.93
Honor,4361.13


In [0]:
%sql
/*Select phones that have higher-than-average RAM compared to all phones.*/
SELECT brand, AVG("RAM(GB)") AS avg_ram 
FROM cleaned_all_phones_csv  
GROUP BY brand  
HAVING AVG("RAM(GB)") IS NOT NULL  
ORDER BY avg_ram DESC;



brand,avg_ram
