In [None]:

# File location and type
file_location = "/FileStore/tables/salary-2.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location)

display(df)

name,id,age,department,salary
user1,1,25,Jr manager,98000
user2,2,30,sr manager,100000
user3,6,35,sr manager,100000
user4,4,32,head,70000
user5,1,45,Jr manager,60000
user6,6,47,head2,45000
user7,5,21,worker,25000
user8,1,22,Jr manager,50000
user9,10,54,lead,45000
user10,59,52,lead2,50000


In [None]:
# Create a view or table

temp_table_name = "salary2"

df.createOrReplaceTempView(temp_table_name)

In [None]:
%sql
select * from `salary2`

name,id,age,department,salary
user1,1,25,Jr manager,98000
user2,2,30,sr manager,100000
user3,6,35,sr manager,100000
user4,4,32,head,70000
user5,1,45,Jr manager,60000
user6,6,47,head2,45000
user7,5,21,worker,25000
user8,1,22,Jr manager,50000
user9,10,54,lead,45000
user10,59,52,lead2,50000


In [None]:
# With this registered as a temp view, it will only be available to this particular notebook. If you'd like other users to be able to query this table, you can also create a table from the DataFrame.
# Once saved, this table will persist across cluster restarts as well as allow various users across different notebooks to query this data.
# To do so, choose your table name and uncomment the bottom line.

permanent_table_name = "salary_2_csv"

df.write.format("parquet").saveAsTable(permanent_table_name)

In [None]:
from delta.tables import *
DeltaTable.create(spark)\
    .tableName("salary")\
    .addColumn("name","STRING")\
    .addColumn("id","INT")\
    .addColumn("age","INT")\
    .addColumn("department","STRING")\
    .addColumn("salary","INT")\
    .execute()
    

<delta.tables.DeltaTable at 0x7fd510272b00>

In [None]:
df.write.insertInto("salary",overwrite=False)

In [None]:
display(spark.table("salary"))

name,id,age,department,salary
user1,1,25,Jr manager,98000
user2,2,30,sr manager,100000
user3,6,35,sr manager,100000
user4,4,32,head,70000
user5,1,45,Jr manager,60000
user6,6,47,head2,45000
user7,5,21,worker,25000
user8,1,22,Jr manager,50000
user9,10,54,lead,45000
user10,59,52,lead2,50000


In [None]:
df.write.format("parquet").mode("append").save("/FileStore/tables/parquet/")

In [None]:
%fs
ls /FileStore/tables/parquet

path,name,size,modificationTime
dbfs:/FileStore/tables/parquet/_SUCCESS,_SUCCESS,0,1704474097000
dbfs:/FileStore/tables/parquet/_committed_1919700065082306584,_committed_1919700065082306584,123,1704474097000
dbfs:/FileStore/tables/parquet/_started_1919700065082306584,_started_1919700065082306584,0,1704474097000
dbfs:/FileStore/tables/parquet/part-00000-tid-1919700065082306584-8b847268-343e-4867-9dcf-6066ff0640c0-65-1-c000.snappy.parquet,part-00000-tid-1919700065082306584-8b847268-343e-4867-9dcf-6066ff0640c0-65-1-c000.snappy.parquet,1972,1704474097000


In [None]:
display(spark.read.text("dbfs:/FileStore/tables/parquet/part-00000-tid-1919700065082306584-8b847268-343e-4867-9dcf-6066ff0640c0-65-1-c000.snappy.parquet"))

value
PAR1����슖 (����8���(���user1	�2	�3	�4	�5	�6	�7	�8	9Q�0
�1
�2
�3
�4
�5
�6
�7
�8
(9���user20PT����<��(����������������


In [None]:
df.write.format("delta").mode("overwrite").save("/FileStore/tables/target/delta")


In [None]:
%fs
ls dbfs:/FileStore/tables/target/delta/_delta_log

path,name,size,modificationTime
dbfs:/FileStore/tables/target/delta/_delta_log/.s3-optimization-0,.s3-optimization-0,0,1704474642000
dbfs:/FileStore/tables/target/delta/_delta_log/.s3-optimization-1,.s3-optimization-1,0,1704474642000
dbfs:/FileStore/tables/target/delta/_delta_log/.s3-optimization-2,.s3-optimization-2,0,1704474642000
dbfs:/FileStore/tables/target/delta/_delta_log/00000000000000000000.crc,00000000000000000000.crc,2840,1704474647000
dbfs:/FileStore/tables/target/delta/_delta_log/00000000000000000000.json,00000000000000000000.json,1864,1704474642000


In [None]:
display(spark.read.text("dbfs:/FileStore/tables/target/delta/_delta_log/00000000000000000000.json"))

value
"{""commitInfo"":{""timestamp"":1704474641891,""userId"":""5220436575086802"",""userName"":""abhirambasa@gmail.com"",""operation"":""WRITE"",""operationParameters"":{""mode"":""Overwrite"",""statsOnLoad"":false,""partitionBy"":""[]""},""notebook"":{""notebookId"":""3346343113817821""},""clusterId"":""0105-162149-saxc1bh3"",""isolationLevel"":""WriteSerializable"",""isBlindAppend"":false,""operationMetrics"":{""numFiles"":""1"",""numOutputRows"":""20"",""numOutputBytes"":""1972""},""tags"":{""restoresDeletedRows"":""false""},""engineInfo"":""Databricks-Runtime/14.2.x-scala2.12"",""txnId"":""4859f97f-cdd4-4f8b-a07f-4d24222b7ad6""}}"
"{""metaData"":{""id"":""7e5abd0b-9389-4828-8031-5a88a4d71c0b"",""format"":{""provider"":""parquet"",""options"":{}},""schemaString"":""{\""type\"":\""struct\"",\""fields\"":[{\""name\"":\""name\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""id\"",\""type\"":\""integer\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""age\"",\""type\"":\""integer\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""department\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""salary\"",\""type\"":\""integer\"",\""nullable\"":true,\""metadata\"":{}}]}"",""partitionColumns"":[],""configuration"":{},""createdTime"":1704474640015}}"
"{""protocol"":{""minReaderVersion"":1,""minWriterVersion"":2}}"
"{""add"":{""path"":""part-00000-29ba884d-1afc-40ad-98a5-03eca0a92eed-c000.snappy.parquet"",""partitionValues"":{},""size"":1972,""modificationTime"":1704474641000,""dataChange"":true,""stats"":""{\""numRecords\"":20,\""minValues\"":{\""name\"":\""user1\"",\""id\"":1,\""age\"":21,\""department\"":\""Jr manager\"",\""salary\"":25000},\""maxValues\"":{\""name\"":\""user9\"",\""id\"":74,\""age\"":63,\""department\"":\""worker\"",\""salary\"":100000},\""nullCount\"":{\""name\"":0,\""id\"":0,\""age\"":0,\""department\"":0,\""salary\"":0}}"",""tags"":{""INSERTION_TIME"":""1704474641000000"",""MIN_INSERTION_TIME"":""1704474641000000"",""MAX_INSERTION_TIME"":""1704474641000000"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"


In [None]:
%sql
select * from delta.`/FileStore/tables/target/delta/`

name,id,age,department,salary
user1,1,25,Jr manager,98000
user2,2,30,sr manager,100000
user3,6,35,sr manager,100000
user4,4,32,head,70000
user5,1,45,Jr manager,60000
user6,6,47,head2,45000
user7,5,21,worker,25000
user8,1,22,Jr manager,50000
user9,10,54,lead,45000
user10,59,52,lead2,50000


In [None]:
%sql
create database emp


In [None]:
%sql
use emp

In [None]:
DeltaTable.create(spark)\
    .tableName("emp10") \
    .addColumn("name","STRING")\
    .execute()

<delta.tables.DeltaTable at 0x7fd4fe185bd0>

In [None]:
display(spark.table("emp10"))

name
abhiram
ram


In [None]:
%sql
insert into emp10 values("abhi"),("ram")

num_affected_rows,num_inserted_rows
2,2


In [None]:
DeltaTable.create(spark)\
    .tableName("emp11") \
    .addColumn("name","STRING")\
    .execute()

<delta.tables.DeltaTable at 0x7fd4fd7220b0>

In [None]:
%sql
insert into emp11 values("abhi1"),("ram1")

num_affected_rows,num_inserted_rows
2,2


In [None]:
display(spark.table("emp11"))

name
abhi1
ram1


In [None]:
%sql
update emp10 set name = "abhiram" where name == "abhi"

num_affected_rows
1
