In [1]:
# Create a new archive
import histore as hs

archive = hs.Archive()

In [2]:
import pandas as pd

# First version

df = pd.DataFrame(
    data=[['Alice', 32], ['Bob', 45], ['Claire', 27], ['Alice', 23]],
    columns=['Name', 'Age']
)
s = archive.commit(df, description='First snapshot')
print(s)

<Snapshot (version=0 description='First snapshot' at=2020-05-03 13:46:57.677924-04:00)>


In [3]:
df = archive.checkout(0)
print(df)

     Name  Age
0   Alice   32
1     Bob   45
2  Claire   27
3   Alice   23


In [4]:
# Second version
# Rename 'Alice' in last row to 'Dave' and change Bob's age to 44

df = pd.DataFrame(
    data=[['Alice', 32], ['Bob', 44], ['Claire', 27], ['Dave', 23]],
    index=[0, 1, 2, 3],
    columns=['Name', 'Age']
)
s = archive.commit(df, description='Now with Dave and Bob is 44')
print(s)

<Snapshot (version=1 description='Now with Dave and Bob is 44' at=2020-05-03 13:46:57.741522-04:00)>


In [5]:
df = archive.checkout(1)
print(df)

     Name  Age
0   Alice   32
1     Bob   44
2  Claire   27
3    Dave   23


In [6]:
# Third version
# Reverse order, change Dave's age to 33

# Ensure to reverse order of row identifier!
df = pd.DataFrame(
    data=[['Dave', 33], ['Claire', 27], ['Bob', 44], ['Alice', 32]],
    index=[3, 2, 1, 0],
    columns=['Name', 'Age']
)
s = archive.commit(df, description='Reverse order and Dave is 33')
print(s)

<Snapshot (version=2 description='Reverse order and Dave is 33' at=2020-05-03 13:46:57.783390-04:00)>


In [7]:
# Fourth version
# Remove Dave, insert Eve, move Alice to front

df = pd.DataFrame(
    data=[['Alice', 32], ['Eve', 25], ['Claire', 27], ['Bob', 44]],
    index=[0, None, 2, 1],
    columns=['Name', 'Age']
)
s = archive.commit(df, description='Now with Eve and Alice is first')
print(s)

<Snapshot (version=3 description='Now with Eve and Alice is first' at=2020-05-03 13:46:57.805425-04:00)>


In [8]:
# Fifth version
# Partial update: Alice is now 31

# Merge data frame that only contains information about Alice
df = pd.DataFrame(
    data=[['Alice', 31]],
    index=[0],
    columns=['Name', 'Age']
)

# As an alternative, we could also merge a data frame that only contains Alice's age (and the row id):
# pd.DataFrame(data=[[31]], index=[0], columns=['Age'])

s = archive.commit(df, description='Alice is 31', partial=True)
print(s)

<Snapshot (version=4 description='Alice is 31' at=2020-05-03 13:46:57.829442-04:00)>


In [9]:
# Sixth version
# Bring Dave back at age 34

df = pd.DataFrame(
    data=[['Alice', 31], ['Eve', 25], ['Claire', 27], ['Bob', 44], ['Dave', 34]],
    index=[0, 4, 2, 1, 3],
    columns=['Name', 'Age']
)

s = archive.commit(df, description='Dave is back at 34')
print(s)

<Snapshot (version=5 description='Dave is back at 34' at=2020-05-03 13:46:57.849368-04:00)>


In [10]:
# Print all versions

for s in archive.snapshots():
    df = archive.checkout(s.version)
    print('({}) {}\n'.format(s.version, s.description))
    print(df)
    print()

(0) First snapshot

     Name  Age
0   Alice   32
1     Bob   45
2  Claire   27
3   Alice   23

(1) Now with Dave and Bob is 44

     Name  Age
0   Alice   32
1     Bob   44
2  Claire   27
3    Dave   23

(2) Reverse order and Dave is 33

     Name  Age
3    Dave   33
2  Claire   27
1     Bob   44
0   Alice   32

(3) Now with Eve and Alice is first

     Name  Age
0   Alice   32
4     Eve   25
2  Claire   27
1     Bob   44

(4) Alice is 31

     Name  Age
0   Alice   31
4     Eve   25
2  Claire   27
1     Bob   44

(5) Dave is back at 34

     Name  Age
0   Alice   31
4     Eve   25
2  Claire   27
1     Bob   44
3    Dave   34



In [11]:
reader = archive.reader()
while reader.has_next():
    row = reader.next()
    print(row)

<ArchiveRow (
	id=0
	key=0
	timestamp=[0-5]
	pos=(0 [0-1,3-5], 3 [2])
	values={0: (Alice [0-5]), 1: (32 [0-3], 31 [4-5])})>
<ArchiveRow (
	id=1
	key=1
	timestamp=[0-5]
	pos=(1 [0-1], 2 [2], 3 [3-5])
	values={0: (Bob [0-5]), 1: (45 [0], 44 [1-5])})>
<ArchiveRow (
	id=2
	key=2
	timestamp=[0-5]
	pos=(2 [0-1,3-5], 1 [2])
	values={0: (Claire [0-5]), 1: (27 [0-5])})>
<ArchiveRow (
	id=3
	key=3
	timestamp=[0-2,5]
	pos=(3 [0-1], 0 [2], 4 [5])
	values={0: (Alice [0], Dave [1-2,5]), 1: (23 [0-1], 33 [2], 34 [5])})>
<ArchiveRow (
	id=4
	key=4
	timestamp=[3-5]
	pos=(1 [3-5])
	values={0: (Eve [3-5]), 1: (25 [3-5])})>
