In [1]:
import pandas
import deltaflow

In [2]:
# Load dataset of 5 market leaders
market_leaders = pandas.DataFrame({
    'symbol': ['REGN','INCY','TMUS','VRSK','JD'],
    'last': [496.5, 74.18, 85.13, 140.78, 40.91],
    'change': [8.34, 0.95, 1.23, 1.4, 0.41],
    'rank': [1, 4, 3, 2, 5]
})
market_leaders

Unnamed: 0,symbol,last,change,rank
0,REGN,496.5,8.34,1
1,INCY,74.18,0.95,4
2,TMUS,85.13,1.23,3
3,VRSK,140.78,1.4,2
4,JD,40.91,0.41,5


In [3]:
# Create a new DeltaFlow field in current working directory
deltaflow.touch()
# Load field as Field object
field = deltaflow.Field()
field

deltaflow.Field('C:\Users\DeltaFlow\Documents\MyFolder')

In [4]:
# Add dataset to field as an origin (saves data to field directory)
field.add_origin(market_leaders, name='leaders')
# Field tree now shows node ID referencing origin
field.tree

leaders
|- 21e694b44afa05a89922be03fa3291ab24153d1d

In [5]:
field.tree.arrows

ARROWS: {
  .leaders -> 21e694b44afa05a89922be03fa3291ab24153d1d
}

In [6]:
# Load the master arrow (always the origin name prefixed with '.')
arrow = field.arrow('.leaders')
arrow

.leaders -> 21e694b44afa05a89922be03fa3291ab24153d1d

In [7]:
# Load a proxy pandas dataframe for making changes 
proxy = arrow.proxy()
proxy

Unnamed: 0,symbol,last,change,rank
0,REGN,496.5,8.34,1
1,INCY,74.18,0.95,4
2,TMUS,85.13,1.23,3
3,VRSK,140.78,1.4,2
4,JD,40.91,0.41,5


In [8]:
# Let's change the index to the rank column using standard pandas methods
proxy = proxy.set_index('rank')
# Let's also change the symbols column to lowercase
proxy['symbol'] = proxy['symbol'].str.lower()
proxy

Unnamed: 0_level_0,symbol,last,change
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,regn,496.5,8.34
4,incy,74.18,0.95
3,tmus,85.13,1.23
2,vrsk,140.78,1.4
5,jd,40.91,0.41


In [9]:
# Use proxy to set the arrow stage dataset index
arrow.set_index(proxy)

Unnamed: 0_level_0,symbol,last,change,rank
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,REGN,496.5,8.34,1
4,INCY,74.18,0.95,4
3,TMUS,85.13,1.23,3
2,VRSK,140.78,1.4,2
5,JD,40.91,0.41,5


In [10]:
# Drop the rank column from arrow stage dataset
arrow.drop('rank', axis=1)
# Update arrow dataset with proxy (changes will be detected)
arrow.update(proxy)

Unnamed: 0_level_0,symbol,last,change
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,regn,496.5,8.34
4,incy,74.18,0.95
3,tmus,85.13,1.23
2,vrsk,140.78,1.4
5,jd,40.91,0.41


In [11]:
print(arrow.stack.layers[1].batch)
print(arrow.stack.layers[2].batch)

[<deltaflow.operation.DropColumns object at 0x16D3F7F0>]
[<deltaflow.operation.Update object at 0x142D0910>]


In [12]:
# Stage operations can also be undone
arrow.undo()

Unnamed: 0_level_0,symbol,last,change
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,REGN,496.5,8.34
4,INCY,74.18,0.95
3,TMUS,85.13,1.23
2,VRSK,140.78,1.4
5,JD,40.91,0.41


In [13]:
arrow.stack.layers

[<deltaflow.arrow.Layer at 0x16c6a5d0>, <deltaflow.arrow.Layer at 0x16d3fed0>]

In [14]:
from deltaflow.delta import Delta
from deltaflow.node import translate_make
delta = Delta(arrow)
print(translate_make(delta.make()))

['DROP 1 COLUMNS(S)']


In [15]:
# Commit changes (writes a delta object to field)
#     notice that 'leaders' arrow now points to a new node ID
arrow.commit()

.leaders -> 9858915fdec4c1dff6d092c57d7b70db0980dcca


In [16]:
# Now the tree will show two node ID's (one a child of the other)
field.tree

leaders
|- 21e694b44afa05a89922be03fa3291ab24153d1d
|  |- 9858915fdec4c1dff6d092c57d7b70db0980dcca

In [17]:
# Let's make a new arrow referencing the origin node
arrow = field.add_arrow('21e694b44afa05a89922be03fa3291ab24153d1d', name='myarrow')
print(field.tree.arrows)
print(field.tree)

ARROWS: {
  .leaders -> 9858915fdec4c1dff6d092c57d7b70db0980dcca
  myarrow -> 21e694b44afa05a89922be03fa3291ab24153d1d
}
leaders
|- 21e694b44afa05a89922be03fa3291ab24153d1d
|  |- 9858915fdec4c1dff6d092c57d7b70db0980dcca



In [18]:
# By loading 'myarrow', we see the original dataset, unaltered
arrow = field.arrow('myarrow')
proxy = arrow.proxy()
proxy

Unnamed: 0,symbol,last,change,rank
0,REGN,496.5,8.34,1
1,INCY,74.18,0.95,4
2,TMUS,85.13,1.23,3
3,VRSK,140.78,1.4,2
4,JD,40.91,0.41,5


In [19]:
# Stage some new changes
buy_sell = pandas.Series(['BUY', 'SELL', 'SELL', 'BUY', 'SELL'])
proxy.insert(4, 'buy/sell', buy_sell)
arrow.extend(proxy)
arrow.proxy()

Unnamed: 0,symbol,last,change,rank,buy/sell
0,REGN,496.5,8.34,1,BUY
1,INCY,74.18,0.95,4,SELL
2,TMUS,85.13,1.23,3,SELL
3,VRSK,140.78,1.4,2,BUY
4,JD,40.91,0.41,5,SELL


In [20]:
# Commit and check the resultant tree structure
arrow.commit()
print(arrow)
field.tree

myarrow -> 0317403a6b408037a36935e2d048adf978a7d2a9
myarrow -> 0317403a6b408037a36935e2d048adf978a7d2a9


leaders
|- 21e694b44afa05a89922be03fa3291ab24153d1d
|  |- 9858915fdec4c1dff6d092c57d7b70db0980dcca
|  |- 0317403a6b408037a36935e2d048adf978a7d2a9