In [1]:
import pandas
import deltaflow

In [2]:
# Load dataset of 5 market leaders
market_leaders = pandas.DataFrame({
    'symbol': ['REGN','INCY','TMUS','VRSK','JD'],
    'last': [496.5, 74.18, 85.13, 140.78, 40.91],
    'change': [8.34, 0.95, 1.23, 1.4, 0.41],
    'rank': [1, 4, 3, 2, 5]
})
market_leaders

Unnamed: 0,symbol,last,change,rank
0,REGN,496.5,8.34,1
1,INCY,74.18,0.95,4
2,TMUS,85.13,1.23,3
3,VRSK,140.78,1.4,2
4,JD,40.91,0.41,5


In [3]:
# Create a new DeltaFlow field in current working directory
deltaflow.touch()
# Load field as Field object
field = deltaflow.Field()
field

deltaflow.Field('C:\Users\Someone\Documents\SomeFolder')

In [4]:
# Add dataset to field as an origin (saves data to field directory)
field.add_origin(market_leaders, name='leaders')
# Field tree now shows node ID referencing origin
field.tree

leaders
|- 7edea99c4bea7975b65ceec129d397278f0965cf

In [5]:
field.tree.arrows

ARROWS: {
  .leaders -> 7edea99c4bea7975b65ceec129d397278f0965cf
}

In [6]:
# Load the master arrow (always the origin name prefixed with '.')
arrow = field.arrow('.leaders')
arrow

.leaders -> 7edea99c4bea7975b65ceec129d397278f0965cf

In [7]:
# Load a proxy pandas dataframe for making changes 
proxy = arrow.proxy()
proxy

Unnamed: 0,symbol,last,change,rank
0,REGN,496.5,8.34,1
1,INCY,74.18,0.95,4
2,TMUS,85.13,1.23,3
3,VRSK,140.78,1.4,2
4,JD,40.91,0.41,5


In [8]:
# Let's change the index to the rank column using standard pandas methods
proxy = proxy.set_index('rank')
# Let's also change the symbols column to lowercase
proxy['symbol'] = proxy['symbol'].str.lower()
proxy

Unnamed: 0_level_0,symbol,last,change
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,regn,496.5,8.34
4,incy,74.18,0.95
3,tmus,85.13,1.23
2,vrsk,140.78,1.4
5,jd,40.91,0.41


In [9]:
# Use proxy to set the arrow stage dataset index
arrow.relabel(proxy, axis=0)

Unnamed: 0_level_0,symbol,last,change,rank
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,REGN,496.5,8.34,1
4,INCY,74.18,0.95,4
3,TMUS,85.13,1.23,3
2,VRSK,140.78,1.4,2
5,JD,40.91,0.41,5


In [10]:
arrow.stage

[
  RELABEL ROWS,
]

In [11]:
# Drop the rank column from arrow stage dataset
arrow.drop('rank', axis=1)
# Update arrow dataset with proxy (changes will be detected)
arrow.put(proxy)

Unnamed: 0_level_0,symbol,last,change
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,regn,496.5,8.34
4,incy,74.18,0.95
3,tmus,85.13,1.23
2,vrsk,140.78,1.4
5,jd,40.91,0.41


In [12]:
arrow.stage

[
  RELABEL ROWS,
  DROP 1 COLUMN(S),
  PUT 5 VALUES,
]

In [13]:
# Stage operations can also be undone
arrow.undo()

Unnamed: 0_level_0,symbol,last,change
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,REGN,496.5,8.34
4,INCY,74.18,0.95
3,TMUS,85.13,1.23
2,VRSK,140.78,1.4
5,JD,40.91,0.41


In [14]:
arrow.stage

[
  RELABEL ROWS,
  DROP 1 COLUMN(S),
]

In [15]:
# Commit changes (writes a delta object to field)
#     notice that 'leaders' arrow now points to a new node ID
arrow.commit()

.leaders -> ed0e6e5ec5567f1d31b7928082bb5a3b6cf7c33b


In [16]:
# Now the tree will show two node ID's (one a child of the other)
field.tree

leaders
|- 7edea99c4bea7975b65ceec129d397278f0965cf
|  |- ed0e6e5ec5567f1d31b7928082bb5a3b6cf7c33b

In [17]:
# Let's make a new arrow referencing the origin node
arrow = field.add_arrow('7edea99c4bea7975b65ceec129d397278f0965cf', name='myarrow')
print(field.tree.arrows)
print(field.tree)

ARROWS: {
  .leaders -> ed0e6e5ec5567f1d31b7928082bb5a3b6cf7c33b
  myarrow -> 7edea99c4bea7975b65ceec129d397278f0965cf
}
leaders
|- 7edea99c4bea7975b65ceec129d397278f0965cf
|  |- ed0e6e5ec5567f1d31b7928082bb5a3b6cf7c33b



In [18]:
# By loading 'myarrow', we see the original dataset, unaltered
arrow = field.arrow('myarrow')
proxy = arrow.proxy()
proxy

Unnamed: 0,symbol,last,change,rank
0,REGN,496.5,8.34,1
1,INCY,74.18,0.95,4
2,TMUS,85.13,1.23,3
3,VRSK,140.78,1.4,2
4,JD,40.91,0.41,5


In [19]:
# Stage some new changes
buy_sell = pandas.Series(['BUY', 'SELL', 'SELL', 'BUY', 'SELL'])
proxy.insert(4, 'buy/sell', buy_sell)
arrow.extend(proxy, axis=1)
arrow.proxy()

Unnamed: 0,symbol,last,change,rank,buy/sell
0,REGN,496.5,8.34,1,BUY
1,INCY,74.18,0.95,4,SELL
2,TMUS,85.13,1.23,3,SELL
3,VRSK,140.78,1.4,2,BUY
4,JD,40.91,0.41,5,SELL


In [20]:
arrow.stage

[
  EXTEND COLUMNS BY 1,
]

In [21]:
# Commit and check the resultant tree structure
arrow.commit()
field.tree

myarrow -> ef364edc0b4094e11483a7939b747c0291233405


leaders
|- 7edea99c4bea7975b65ceec129d397278f0965cf
|  |- ed0e6e5ec5567f1d31b7928082bb5a3b6cf7c33b
|  |- ef364edc0b4094e11483a7939b747c0291233405

In [22]:
field.tree.arrows

ARROWS: {
  .leaders -> ed0e6e5ec5567f1d31b7928082bb5a3b6cf7c33b
  myarrow -> ef364edc0b4094e11483a7939b747c0291233405
}

In [23]:
# Loading a node for inspection
node = field.tree.node('ef364edc0b4094e11483a7939b747c0291233405')
node

NODE[ef364edc0b4094e11483a7939b747c0291233405]: {
  type: delta
  origin: 3ebca8d88a8673a1e3d01f583743bbb54aeaa617
  lineage: [7edea99c4bea7975b65ceec129d397278f0965cf, ...] (1)
}

In [24]:
# Inspecting delta file block content
node.delta

BLOCKS: {
  [1] EXTEND: 1 columns(s)
}

In [25]:
# Individual blocks can be loaded
node.delta[1][0]

Unnamed: 0,buy/sell
0,BUY
1,SELL
2,SELL
3,BUY
4,SELL


In [26]:
# .leaders (master) arrow is quickly reconstructed
arrow = field.arrow('.leaders')
arrow.proxy()

Unnamed: 0_level_0,symbol,last,change
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,REGN,496.5,8.34
4,INCY,74.18,0.95
3,TMUS,85.13,1.23
2,VRSK,140.78,1.4
5,JD,40.91,0.41


In [27]:
# 'myarrow' arrow points to origin
arrow = field.arrow('myarrow')
proxy = arrow.proxy()
# origin is still intact
proxy

Unnamed: 0,symbol,last,change,rank,buy/sell
0,REGN,496.5,8.34,1,BUY
1,INCY,74.18,0.95,4,SELL
2,TMUS,85.13,1.23,3,SELL
3,VRSK,140.78,1.4,2,BUY
4,JD,40.91,0.41,5,SELL
