In [1]:
import pandas as pd
import pandas.util.testing as tu

## Setup

Multiindex is used to highlight specific behavior.

In [2]:
d1 = pd.DataFrame({
    'a': [1, 1, 3],
    'b': [11, 22, 22],
    'c': [4, 9, 9],
    'd': [5, 4, 3]}
).set_index(['a', 'b', 'c'])
d1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,d
a,b,c,Unnamed: 3_level_1
1,11,4,5
1,22,9,4
3,22,9,3


In [3]:
d2 = d1.copy()
d2.loc[(1, 22, 8), :] = 4
d2 = d2.drop((1, 22, 9)).sort_index()
d2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,d
a,b,c,Unnamed: 3_level_1
1,11,4,5.0
1,22,8,4.0
3,22,9,3.0


## Quickly show distinct items

Removes all rows which are found in both dataframes, set equivalent of: (a | b) - (a & b)  

In [4]:
# drop_duplicates() does not take index into account!!!
pd.concat([d1, d2]).drop_duplicates(keep=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,d
a,b,c,Unnamed: 3_level_1


In [5]:
pd.concat([d1.reset_index(), d2.reset_index()]).drop_duplicates(keep=False)

Unnamed: 0,a,b,c,d
1,1,22,9,4.0
1,1,22,8,4.0


## Set difference on index

In [6]:
pd.DataFrame(
    index=d1.index.difference(d2.index)
)

a,b,c
1,22,9


In [7]:
pd.DataFrame(
    index=d2.index.difference(d1.index)
)

a,b,c
1,22,8


## Test for equality

In [8]:
try:
    tu.assert_frame_equal(d1, d2)
    print('Data frames are the same.')
except AssertionError as err:
    print(err)

MultiIndex level [2] are different

MultiIndex level [2] values are different (33.33333 %)
[left]:  Int64Index([4, 9, 9], dtype='int64', name='c')
[right]: Int64Index([4, 8, 9], dtype='int64', name='c')


In [9]:
try:
    tu.assert_frame_equal(d1, d1.copy())
    print('Data frames are the same.')
except AssertionError as err:
    print(err)

Data frames are the same.
