# Index Objects

pandas’s Index objects are responsible for holding the axis labels and other metadata
(like the axis name or names)

In [2]:
# Import Pandas
import pandas as pd
#Now Pnadas is imported and ready to use.
from pandas import Series, DataFrame

In [3]:
# Create a series with indexes a,b,c and values in range 0 to 2.
series = pd.Series(range(3),index=['a','b','c'])
print(series)

a    0
b    1
c    2
dtype: int64


In [4]:
ind = series.index
print(ind)

Index(['a', 'b', 'c'], dtype='object')


In [5]:
print(ind[1:])

Index(['b', 'c'], dtype='object')


Index objects are immutable and thus can’t be modified by the user

In [7]:
ind[1] = 'd' # TypeError

TypeError: Index does not support mutable operations

# Reindexing

In [8]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[1, 2, 5])
obj3

1      blue
2    purple
5    yellow
dtype: object

In [9]:
obj3.reindex(range(6))

0       NaN
1      blue
2    purple
3       NaN
4       NaN
5    yellow
dtype: object

In [10]:
obj3.reindex(range(6),method='bfill')

0      blue
1      blue
2    purple
3    yellow
4    yellow
5    yellow
dtype: object

# Dropping Entries from an Axis

In [11]:
import numpy as np
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
obj 

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [12]:
new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [13]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [14]:
data.drop(['Ohio','Colorado'])

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [15]:
#Many functions, like drop, which modify the size or shape of a Series or 
#DataFrame,can manipulate an object in-place without returning a new object:
obj.drop('c', inplace=True)
obj # Be careful with the inplace, as it destroys any data that is dropped.

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

# Indexing, Selection, and Filtering

In [16]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [17]:
obj['b']

1.0

In [18]:
obj[2]

2.0

In [19]:
obj[2:4]

c    2.0
d    3.0
dtype: float64

In [20]:
obj[['c','d']]

c    2.0
d    3.0
dtype: float64

In [21]:
obj[obj<2]

a    0.0
b    1.0
dtype: float64

In [22]:
# Indexing into a DataFrame is for retrieving one or more columns either 
# with a single value or sequence
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [23]:
data['two']

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int64

In [24]:
data[['two','three']]

Unnamed: 0,two,three
Ohio,1,2
Colorado,5,6
Utah,9,10
New York,13,14


In [25]:
data[:3]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11


In [26]:
data[data<4]

Unnamed: 0,one,two,three,four
Ohio,0.0,1.0,2.0,3.0
Colorado,,,,
Utah,,,,
New York,,,,


In [27]:
data[data>4]

Unnamed: 0,one,two,three,four
Ohio,,,,
Colorado,,5.0,6.0,7.0
Utah,8.0,9.0,10.0,11.0
New York,12.0,13.0,14.0,15.0


In [28]:
data[data < 5] = 0
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


# Selection with loc and iloc

In [29]:
The main distinction between loc and iloc is:
loc is label-based, which means that you have to specify rows and columns based on their row and 
column labels.
iloc is integer position-based, so you have to specify rows and columns by their integer position 
values (0-based integer position).

SyntaxError: invalid syntax (<ipython-input-29-87360871da2a>, line 1)

In [30]:
data.loc['Colorado', ['two', 'three']]

two      5
three    6
Name: Colorado, dtype: int64

In [31]:
data.iloc[2, [3, 0, 1]]

four    11
one      8
two      9
Name: Utah, dtype: int64

In [32]:
data.iloc[2]

one       8
two       9
three    10
four     11
Name: Utah, dtype: int64

In [33]:
data.iloc[[1, 2], [3, 0, 1]]

Unnamed: 0,four,one,two
Colorado,7,0,5
Utah,11,8,9


In [34]:
# Both indexing functions work with slices in addition to single 
# labels or lists of labels:
data.loc[:'Utah', 'two']

Ohio        0
Colorado    5
Utah        9
Name: two, dtype: int64

In [35]:
data.iloc[:, :3]

Unnamed: 0,one,two,three
Ohio,0,0,0
Colorado,0,5,6
Utah,8,9,10
New York,12,13,14


In [36]:
data.iloc[:, :3][data.three > 5]

Unnamed: 0,one,two,three
Colorado,0,5,6
Utah,8,9,10
New York,12,13,14
