In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [5]:
df1 = DataFrame(np.arange(8).reshape(2,4),
                index=pd.Index(['LA','SF'], name='city'),
                columns=pd.Index(['A','B','C','D'], name='letter'))
df1

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SF,4,5,6,7


In [6]:
df_st = df1.stack()
df_st

city  letter
LA    A         0
      B         1
      C         2
      D         3
SF    A         4
      B         5
      C         6
      D         7
dtype: int32

In [8]:
df_st.unstack()

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SF,4,5,6,7


In [9]:
df_st.unstack('letter')

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SF,4,5,6,7


In [10]:
df_st.unstack('city')

city,LA,SF
letter,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0,4
B,1,5
C,2,6
D,3,7


In [11]:
#How stack() and unstack() handle null values
ser1 = Series([0,1,2], index=['Q','X','Y'])
ser1

Q    0
X    1
Y    2
dtype: int64

In [12]:
ser2 = Series([4,5,6], index=['X','Y','Z'])
ser2

X    4
Y    5
Z    6
dtype: int64

In [22]:
df = pd.concat([ser1,ser2], keys=['Alpha','Beta'])
df

Alpha  Q    0
       X    1
       Y    2
Beta   X    4
       Y    5
       Z    6
dtype: int64

In [23]:
df.unstack()

Unnamed: 0,Q,X,Y,Z
Alpha,0.0,1.0,2.0,
Beta,,4.0,5.0,6.0


In [24]:
df.unstack().stack()

Alpha  Q    0.0
       X    1.0
       Y    2.0
Beta   X    4.0
       Y    5.0
       Z    6.0
dtype: float64

In [25]:
#To keep the null values from the DataFrame
df = df.unstack()
df

Unnamed: 0,Q,X,Y,Z
Alpha,0.0,1.0,2.0,
Beta,,4.0,5.0,6.0


In [26]:
df.stack(dropna=False)

Alpha  Q    0.0
       X    1.0
       Y    2.0
       Z    NaN
Beta   Q    NaN
       X    4.0
       Y    5.0
       Z    6.0
dtype: float64

# Duplicates in DataFrame

In [27]:
df = DataFrame({'key1':['A']*2 + ['B']*3,
                'key2':[2,2,2,3,3]})
df

Unnamed: 0,key1,key2
0,A,2
1,A,2
2,B,2
3,B,3
4,B,3


In [28]:
df.duplicated() # Returns True if rows are duplicate

0    False
1     True
2    False
3    False
4     True
dtype: bool

In [29]:
df.drop_duplicates() #Gets rid of duplicates

Unnamed: 0,key1,key2
0,A,2
2,B,2
3,B,3


In [30]:
df.drop_duplicates(['key1'])

Unnamed: 0,key1,key2
0,A,2
2,B,2


In [31]:
df

Unnamed: 0,key1,key2
0,A,2
1,A,2
2,B,2
3,B,3
4,B,3


In [34]:
#To take the last value for duplicates
df.drop_duplicates(['key1'],keep='last')

Unnamed: 0,key1,key2
1,A,2
4,B,3
