
## Overview

This notebook will show you how to create and query a table or DataFrame that you uploaded to DBFS. [DBFS](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) is a Databricks File System that allows you to store data for querying inside of Databricks. This notebook assumes that you have a file already inside of DBFS that you would like to read from.

This notebook is written in **Python** so the default cell type is Python. However, you can use different languages by using the `%LANGUAGE` syntax. Python, Scala, SQL, and R are all supported.

In [0]:
# File location and type
file_location = "/FileStore/tables/Covid_Data_03_09_2023-4.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location)

display(df)

FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio
,,,Afghanistan,2023-03-10T04:21:03.000+0000,33.93911,67.709953,209451,7896,,,Afghanistan,538.0424508714615,3.76985547932452
,,,Albania,2023-03-10T04:21:03.000+0000,41.1533,20.1683,334457,3598,,,Albania,11621.96817012996,1.075773567304616
,,,Algeria,2023-03-10T04:21:03.000+0000,28.0339,1.6596,271496,6881,,,Algeria,619.132365905185,2.534475646050034
,,,Andorra,2023-03-10T04:21:03.000+0000,42.5063,1.5218,47890,165,,,Andorra,61981.49226687375,0.3445395698475673
,,,Angola,2023-03-10T04:21:03.000+0000,-11.2027,17.8739,105288,1933,,,Angola,320.35277020195906,1.835916723653218
,,,Antarctica,2023-03-10T04:21:03.000+0000,-71.9499,23.347,11,0,,,Antarctica,,0.0
,,,Antigua and Barbuda,2023-03-10T04:21:03.000+0000,17.0608,-61.7964,9106,146,,,Antigua and Barbuda,9298.668409443671,1.6033384581594552
,,,Argentina,2023-03-10T04:21:03.000+0000,-38.4161,-63.6167,10044957,130472,,,Argentina,22225.43269916568,1.2988806223859397
,,,Armenia,2023-03-10T04:21:03.000+0000,40.0691,45.0382,447308,8727,,,Armenia,15095.264160710898,1.9510046768669465
,,Australian Capital Territory,Australia,2023-03-10T04:21:03.000+0000,-35.4735,149.0124,232974,228,,,"Australian Capital Territory, Australia",54420.46250875964,0.0978649978109145


In [0]:
df.write.format("delta").mode("overwrite").option("mergeSchema", "true").save("/mnt/dbfs/Covid_Data_03_09_2023")


In [0]:
# Read the Delta table again
df_check = spark.read.format("delta").load("/mnt/dbfs/Covid_Data_03_09_2023")

# Print the schema of the table to check if it's correct
df_check.printSchema()


root
 |-- FIPS: integer (nullable = true)
 |-- Admin2: string (nullable = true)
 |-- Province_State: string (nullable = true)
 |-- Country_Region: string (nullable = true)
 |-- Last_Update: timestamp (nullable = true)
 |-- Lat: double (nullable = true)
 |-- Long_: double (nullable = true)
 |-- Confirmed: integer (nullable = true)
 |-- Deaths: integer (nullable = true)
 |-- Recovered: string (nullable = true)
 |-- Active: string (nullable = true)
 |-- Combined_Key: string (nullable = true)
 |-- Incident_Rate: double (nullable = true)
 |-- Case_Fatality_Ratio: double (nullable = true)

