In [1]:
from deltalake import DeltaTable
from deltalake.writer import write_deltalake
import os
import json
import pandas as pd


In [2]:
delta_table_path = 'deltaTable/'
dt = DeltaTable(delta_table_path) 

In [3]:
# Read Data from Delta table
dt.to_pandas()

Unnamed: 0,Name,Age,Education
0,Tom,20,Teacher
1,Joseph,21,Engineer
2,Krish,19,Nurse
3,John,18,Student
4,Kalpan,30,Engineering


In [4]:
# Read Name and Eduction from the delta table 
dt.to_pandas(columns=["Name","Education"])

Unnamed: 0,Name,Education
0,Tom,Teacher
1,Joseph,Engineer
2,Krish,Nurse
3,John,Student
4,Kalpan,Engineering


In [5]:
# Write Data to Delta table 
df = pd.DataFrame({'Name': ['Nik'], 'Age': [31], 'Education': 'Engineering'})
write_deltalake(dt, df, mode="append") 

In [6]:
# Read newly inserted Data from Delta table
dt.to_pandas()

Unnamed: 0,Name,Age,Education
0,Tom,20,Teacher
1,Joseph,21,Engineer
2,Krish,19,Nurse
3,John,18,Student
4,Kalpan,30,Engineering
5,Nik,31,Engineering


In [7]:
# Check history 
dt.history()

[{'timestamp': 1684636757418,
  'operation': 'WRITE',
  'operationParameters': {'mode': 'Append', 'partitionBy': '[]'},
  'isolationLevel': 'Serializable',
  'isBlindAppend': True,
  'engineInfo': 'Apache-Spark/3.2.1 Delta-Lake/2.0.0',
  'operationMetrics': {'numFiles': '2',
   'numOutputBytes': '1964',
   'numOutputRows': '4'},
  'txnId': 'b0ba13a1-5f81-41a1-8690-fdf649209f5c'},
 {'timestamp': 1684645048439,
  'operation': 'WRITE',
  'operationParameters': {'partitionBy': '[]', 'mode': 'Append'},
  'clientVersion': 'delta-rs.0.10.0'},
 {'timestamp': 1684645413576,
  'operation': 'WRITE',
  'operationParameters': {'mode': 'Append', 'partitionBy': '[]'},
  'clientVersion': 'delta-rs.0.10.0'}]

In [8]:
# Check Schema of Delta table
dt.schema().json()

{'type': 'struct',
 'fields': [{'name': 'Name',
   'type': 'string',
   'nullable': True,
   'metadata': {}},
  {'name': 'Age', 'type': 'long', 'nullable': True, 'metadata': {}},
  {'name': 'Education', 'type': 'string', 'nullable': True, 'metadata': {}}]}

In [9]:
# Check files 
dt.files()

['part-00000-19db2b24-c6ed-4057-bf33-c83e973d9e4e-c000.snappy.parquet',
 'part-00001-58501cbb-c436-4b8a-bb36-caaa117e0f65-c000.snappy.parquet',
 '1-2354e3ea-0b5b-4112-811d-4ad37b622074-0.parquet',
 '2-898b32ca-c8da-4e23-b30d-1666b72f71c9-0.parquet']

In [10]:
# Check versions 
dt.version()

2

In [11]:
# Load Specific version 
dt.load_version(0)

In [12]:
# Read Data from specifc loaded version of Delta table
dt.to_pandas()

Unnamed: 0,Name,Age,Education
0,Tom,20,Teacher
1,Joseph,21,Engineer
2,Krish,19,Nurse
3,John,18,Student


In [14]:
# Load Specific version 
dt.load_version(1)

In [15]:
# Read Data from specifc loaded version of Delta table
dt.to_pandas()

Unnamed: 0,Name,Age,Education
0,Tom,20,Teacher
1,Joseph,21,Engineer
2,Krish,19,Nurse
3,John,18,Student
4,Kalpan,30,Engineering


In [16]:
# Load Specific version 
dt.load_version(2)
# Read Data from specifc loaded version of Delta table
dt.to_pandas()

Unnamed: 0,Name,Age,Education
0,Tom,20,Teacher
1,Joseph,21,Engineer
2,Krish,19,Nurse
3,John,18,Student
4,Kalpan,30,Engineering
5,Nik,31,Engineering


In [17]:
# Execute vacuum operation 
dt.vacuum()

[]