In [0]:
%sql
-- selecting catalog
use catalog workspace;

In [0]:
# checking the files added to the volume bmw
spark.sql(f"LIST '/Volumes/workspace/bmw/bwm_sales/'").display()

path,name,size,modification_time
/Volumes/workspace/bmw/bwm_sales/bwm_sales.csv,bwm_sales.csv,3387065,1761404794000


In [0]:
# checking csv data structure of the csv files. This will show the data in tabulated format.
spark.sql (
    f'''
    SELECT * FROM 
    CSV.`/Volumes/workspace/bmw/bwm_sales/`
    ''').display()


_c0,_c1,_c2,_c3,_c4,_c5,_c6,_c7,_c8,_c9,_c10
MODEL,YEAR,REGION,COLOR,FUEL_TYPE,TRANSMISSION,ENGINE_SIZE_L,MILEAGE_KM,PRICE_USD,SALES_VOLUME,SALES_CLASSIFICATION
i8,2010,South America,Black,Hybrid,Automatic,4.7,16020,102215,5006,Low
i8,2010,Middle East,Grey,Petrol,Automatic,4.0,98514,37949,2278,Low
X6,2010,Europe,Red,Hybrid,Manual,3.8,128477,68106,1048,Low
i8,2010,North America,White,Electric,Automatic,2.5,75457,101282,8396,High
X6,2010,Africa,Silver,Petrol,Manual,1.7,176650,33796,5935,Low
M5,2010,South America,White,Diesel,Manual,2.8,121393,108152,7139,High
3 Series,2010,Asia,Black,Petrol,Manual,2.1,107572,8666,8650,High
5 Series,2010,Europe,Red,Petrol,Manual,1.8,194101,3412,1344,Low
i3,2010,Africa,Blue,Petrol,Manual,3.6,91061,45834,7607,High


In [0]:
# checking data with csv structure with header and raw data
spark.sql (
    f'''
    SELECT * FROM 
    text.`/Volumes/workspace/bmw/bwm_sales/`
    ''').display()
    

value
"MODEL,YEAR,REGION,COLOR,FUEL_TYPE,TRANSMISSION,ENGINE_SIZE_L,MILEAGE_KM,PRICE_USD,SALES_VOLUME,SALES_CLASSIFICATION"
"i8,2010,South America,Black,Hybrid,Automatic,4.7,16020,102215,5006,Low"
"i8,2010,Middle East,Grey,Petrol,Automatic,4.0,98514,37949,2278,Low"
"X6,2010,Europe,Red,Hybrid,Manual,3.8,128477,68106,1048,Low"
"i8,2010,North America,White,Electric,Automatic,2.5,75457,101282,8396,High"
"X6,2010,Africa,Silver,Petrol,Manual,1.7,176650,33796,5935,Low"
"M5,2010,South America,White,Diesel,Manual,2.8,121393,108152,7139,High"
"3 Series,2010,Asia,Black,Petrol,Manual,2.1,107572,8666,8650,High"
"5 Series,2010,Europe,Red,Petrol,Manual,1.8,194101,3412,1344,Low"
"i3,2010,Africa,Blue,Petrol,Manual,3.6,91061,45834,7607,High"


In [0]:
%sql
-- Drop table if it already exists fr demo purposes
DROP TABLE IF EXISTS bmw.bmw_sales;

-- Creating a delta table using a CSV file

CREATE TABLE bmw.bmw_sales AS 
SELECT 
MODEL,YEAR,REGION,COLOR,FUEL_TYPE,TRANSMISSION,ENGINE_SIZE_L,MILEAGE_KM,PRICE_USD,SALES_VOLUME,SALES_CLASSIFICATION
FROM read_files (
'/Volumes/workspace/bmw/bwm_sales/',
format => 'csv',
inferSchema => 'true',
header => 'true'
);

num_affected_rows,num_inserted_rows


In [0]:
%sql
-- Displaying the table
select * from bmw.bmw_sales;

-- Counting the data from the table
select count(*) from bmw.bmw_sales

count(*)
50000


In [0]:
# using python
# read the csv file and create a spark dataframe
sdf = (spark
       .read
       .format("csv")
       .option("header", "true")
       .option("inferSchema", "true")
       .load('/Volumes/workspace/bmw/bwm_sales/')
       )

# create a delta table from the spark dataframe
(sdf
 .write
 .mode("overwrite")
 .format("delta")
 .saveAsTable("workspace.bmw.bmw_sales_py")
)

In [0]:
# reading the table using Python
spark.read.table(f"workspace.bmw.bmw_sales_py").display()

# second option
# spark.sql(f"SELECT * FROM bmw.bmw_sales_py").display()

MODEL,YEAR,REGION,COLOR,FUEL_TYPE,TRANSMISSION,ENGINE_SIZE_L,MILEAGE_KM,PRICE_USD,SALES_VOLUME,SALES_CLASSIFICATION
i8,2010,South America,Black,Hybrid,Automatic,4.7,16020,102215,5006,Low
i8,2010,Middle East,Grey,Petrol,Automatic,4.0,98514,37949,2278,Low
X6,2010,Europe,Red,Hybrid,Manual,3.8,128477,68106,1048,Low
i8,2010,North America,White,Electric,Automatic,2.5,75457,101282,8396,High
X6,2010,Africa,Silver,Petrol,Manual,1.7,176650,33796,5935,Low
M5,2010,South America,White,Diesel,Manual,2.8,121393,108152,7139,High
3 Series,2010,Asia,Black,Petrol,Manual,2.1,107572,8666,8650,High
5 Series,2010,Europe,Red,Petrol,Manual,1.8,194101,3412,1344,Low
i3,2010,Africa,Blue,Petrol,Manual,3.6,91061,45834,7607,High
i3,2010,Africa,Blue,Diesel,Manual,1.8,120482,101137,968,Low


In [0]:
# showing the table with spark

spark.catalog.listTables("workspace.bmw")

[Table(name='bmw_sales', catalog='workspace', namespace=['bmw'], description=None, tableType='MANAGED', isTemporary=False),
 Table(name='bmw_sales_py', catalog='workspace', namespace=['bmw'], description=None, tableType='MANAGED', isTemporary=False)]

In [0]:
%sql
-- describe detail mode to see info about a table
describe detail bmw.bmw_sales;

format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,ce30bd20-6139-4550-a42f-eb6061e5e985,workspace.bmw.bmw_sales,,,2025-10-25T15:29:31.025Z,2025-10-25T15:29:33.000Z,List(),List(),1,509588,"Map(delta.parquet.compression.codec -> zstd, delta.enableDeletionVectors -> true)",3,7,"List(appendOnly, deletionVectors, invariants)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False


In [0]:
%sql
-- describe extend mode to see info about a table
describe extended bmw.bmw_sales;

col_name,data_type,comment
MODEL,string,
YEAR,int,
REGION,string,
COLOR,string,
FUEL_TYPE,string,
TRANSMISSION,string,
ENGINE_SIZE_L,double,
MILEAGE_KM,int,
PRICE_USD,int,
SALES_VOLUME,int,


In [0]:
%sql
-- Understanding time travel fundamentals
DESCRIBE HISTORY bmw.bmw_sales;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,2025-10-25T15:29:33.000Z,4355037857660891,etunon@gmail.com,CREATE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.parquet.compression.codec"":""zstd"",""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(1084526006227216),1025-130409-mbllimed-v2n,,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 50000, numOutputBytes -> 509588)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13


In [0]:
%sql
-- 1. Insert one record into the table
INSERT INTO workspace.bmw.bmw_sales
VALUES
('i99',2025,"South America","Pink","Hybrid","Automatic",4.7,11020,10301,2011,"Low");

-- 2. Update a record in the table
UPDATE workspace.bmw.bmw_sales
SET YEAR = 2026
WHERE MODEL = 'i99';

--3. Delete a record in the table
DELETE FROM workspace.bmw.bmw_sales
WHERE MODEL = 'i99';



num_affected_rows
1


In [0]:
%sql
-- Checking the historic of the table.
describe history bmw.bmw_sales;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
3,2025-10-25T17:18:16.000Z,4355037857660891,etunon@gmail.com,DELETE,"Map(predicate -> [""(MODEL#11885 = i99)""])",,List(1084526006227216),1025-171639-wl9xvpgi-v2n,2.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 2773, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 1199, numDeletionVectorsUpdated -> 0, numDeletedRows -> 1, scanTimeMs -> 1064, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 134)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
2,2025-10-25T17:18:14.000Z,4355037857660891,etunon@gmail.com,UPDATE,"Map(predicate -> [""(MODEL#11336 = i99)""])",,List(1084526006227216),1025-171639-wl9xvpgi-v2n,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 2704, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 4837, numDeletionVectorsUpdated -> 0, scanTimeMs -> 2803, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 2773, rewriteTimeMs -> 2006)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
1,2025-10-25T17:18:07.000Z,4355037857660891,etunon@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> true, partitionBy -> [])",,List(1084526006227216),1025-171639-wl9xvpgi-v2n,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 2704)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
0,2025-10-25T15:29:33.000Z,4355037857660891,etunon@gmail.com,CREATE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.parquet.compression.codec"":""zstd"",""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(1084526006227216),1025-130409-mbllimed-v2n,,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 50000, numOutputBytes -> 509588)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13


In [0]:
%sql
-- Using time travel to the table prior the delete operation

SELECT * FROM bmw.bmw_sales VERSION AS OF 2
where MODEL='i99';

-- Alternate Syntax
-- SELECT * FROM bmw.bmw_sales@v2

MODEL,YEAR,REGION,COLOR,FUEL_TYPE,TRANSMISSION,ENGINE_SIZE_L,MILEAGE_KM,PRICE_USD,SALES_VOLUME,SALES_CLASSIFICATION
i99,2026,South America,Pink,Hybrid,Automatic,4.7,11020,10301,2011,Low
