Index object is immutable, once the index object is created, you can not change it

In [1]:
import pandas as pd
import numpy as np

s1 = pd.Series(np.random.rand(5), index=['a','b','c','d','e'])
s1.name = 'my data'
s1

a    0.272108
b    0.541988
c    0.511401
d    0.014888
e    0.042703
Name: my data, dtype: float64

In [3]:
df = pd.DataFrame(np.random.rand(4,3), columns=['A','B','C'])
df

Unnamed: 0,A,B,C
0,0.879044,0.634116,0.764333
1,0.251501,0.649424,0.665734
2,0.123237,0.613228,0.132912
3,0.541626,0.745228,0.826012


In [6]:
df.columns

Index(['A', 'B', 'C'], dtype='object')

In [7]:
# Index containing duplicate labels
s = pd.Series([1,2,3,4,5], index =['a','b','a','c','d'])
s

a    1
b    2
a    3
c    4
d    5
dtype: int64

In [8]:
s.a

a    1
a    3
dtype: int64

# Changing the index

In [10]:
s = pd.Series([1,2,3,4,5], index=['a','b','c','d','e'])
idx = s.index
idx

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [11]:
idx2 = pd.Index(['c','d','e','f','g'])
idx2

Index(['c', 'd', 'e', 'f', 'g'], dtype='object')

## drop a label
.drop function > asign it to another ojbect

In [13]:
idx2.drop('c')

Index(['d', 'e', 'f', 'g'], dtype='object')

In [14]:
# the drop function does not modify idx2 object.
#See how idx2's index still contain 'c' after the drop function

idx2

Index(['c', 'd', 'e', 'f', 'g'], dtype='object')

In [15]:
idx2_new = idx2.drop('c')
idx2_new

Index(['d', 'e', 'f', 'g'], dtype='object')

## insert a label
call member function .insert > asign the output to another object

In [17]:
idx2.insert(0,'a')

Index(['a', 'c', 'd', 'e', 'f', 'g'], dtype='object')

In [26]:
idx2_new = idx2.insert(0,'a')
idx2_new

Index(['a', 'c', 'd', 'e', 'f', 'g'], dtype='object')

## reindexing
call member function .index() > asign the output to a new object

In [27]:
s = pd.Series([1,2,3,4,5], index=['a','b','c','d','e'])
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [28]:
s.reindex(['a','c','e','x','y'])

a    1.0
c    3.0
e    5.0
x    NaN
y    NaN
dtype: float64

In [30]:
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [33]:
s2 = s.reindex(['a','c','e','x','y'])
s2

a    1.0
c    3.0
e    5.0
x    NaN
y    NaN
dtype: float64

### fill a value if you don't want to have NaN
call function.fill_value

In [34]:
s2 = s.reindex(['a','c','e','x','y'], fill_value=100)
s2

a      1
c      3
e      5
x    100
y    100
dtype: int64

### Forward fill (ffill), backward fill (bfill)
insert method = 'ffill' or 'bfill'

In [35]:
s1 = pd.Series([0, 0.1, 0.2], index=[0,5,10])
s1

0     0.0
5     0.1
10    0.2
dtype: float64

In [38]:
s2 = s1.reindex(range(0,11), method='ffill')
s2

0     0.0
1     0.0
2     0.0
3     0.0
4     0.0
5     0.1
6     0.1
7     0.1
8     0.1
9     0.1
10    0.2
dtype: float64

## Reindexing a dataframe

In [40]:
df = pd.DataFrame(np.random.randint(100,size=(4,3)), columns=['CA','OR','WA'])
df

Unnamed: 0,CA,OR,WA
0,88,20,90
1,3,14,72
2,48,57,40
3,47,1,9


In [44]:
col2 = ['WA','CA','NY']
df2 = df.reindex(columns=col2)
df2

Unnamed: 0,WA,CA,NY
0,90,88,
1,72,3,
2,40,48,
3,9,47,


In [46]:
df2 = df.reindex(columns=col2, fill_value=0)
df2

Unnamed: 0,WA,CA,NY
0,90,88,0
1,72,3,0
2,40,48,0
3,9,47,0
