# Filtering & Selecting

In [1]:
import numpy as np 
import pandas as pd 

from pandas import Series, DataFrame

### Selecting and retrieving data 

You can write an index value in two forms 
- Label index
- Integer index


In [2]:
series_obj = Series(np.arange(8), index=['row 1', 'row 2', 'row 3', 'row 4', 'row 5', 'row 6', 'row 7', 'row 8'])
series_obj

row 1    0
row 2    1
row 3    2
row 4    3
row 5    4
row 6    5
row 7    6
row 8    7
dtype: int64

In [3]:
# select by name
series_obj['row 7']

6

In [4]:
# select by index
series_obj[0]

0

In [5]:
series_obj[[0, 7]]

row 1    0
row 8    7
dtype: int64

In [6]:
# create a dataframe with random numbers
np.random.seed(25)
df_obj = DataFrame(
    np.random.rand(36).reshape((6,6)),
    index = ['row 1', 'row 2', 'row 3', 'row 4', 'row 5', 'row 6'],
    columns = ['column 1', 'column 2', 'column 3', 'column 4', 'column 5', 'column 6']
)
df_obj

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.638481,0.605738,0.046909,0.125227,0.627795,0.514878
row 2,0.52271,0.827939,0.620885,0.574423,0.43106,0.91162
row 3,0.845388,0.449311,0.758815,0.89324,0.762819,0.240679
row 4,0.117376,0.848415,0.255336,0.792824,0.069339,0.676753
row 5,0.450905,0.058101,0.974556,0.915639,0.025716,0.788688
row 6,0.032145,0.608906,0.320885,0.210881,0.012956,0.249239


In [7]:
# selecting columns and rows
df_obj.loc[['row 2', 'row 5'], ['column 5', 'column 2']]

Unnamed: 0,column 5,column 2
row 2,0.43106,0.827939
row 5,0.025716,0.058101


### Data Slicing


In [9]:
series_obj['row 3':'row 7']

row 3    2
row 4    3
row 5    4
row 6    5
row 7    6
dtype: int64

In [10]:
df_obj['row 3':'row 5']

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 3,0.845388,0.449311,0.758815,0.89324,0.762819,0.240679
row 4,0.117376,0.848415,0.255336,0.792824,0.069339,0.676753
row 5,0.450905,0.058101,0.974556,0.915639,0.025716,0.788688


### Comparing with scalars

In [12]:
series_obj < .2

row 1     True
row 2    False
row 3    False
row 4    False
row 5    False
row 6    False
row 7    False
row 8    False
dtype: bool

In [11]:
df_obj < .2

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,False,False,True,True,False,False
row 2,False,False,False,False,False,False
row 3,False,False,False,False,False,False
row 4,True,False,False,False,True,False
row 5,False,True,False,False,True,False
row 6,True,False,False,False,True,False


### Filtering with scalars

In [13]:
series_obj[series_obj > 5]

row 7    6
row 8    7
dtype: int64

In [14]:
df_obj[df_obj > .2]

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.638481,0.605738,,,0.627795,0.514878
row 2,0.52271,0.827939,0.620885,0.574423,0.43106,0.91162
row 3,0.845388,0.449311,0.758815,0.89324,0.762819,0.240679
row 4,,0.848415,0.255336,0.792824,,0.676753
row 5,0.450905,,0.974556,0.915639,,0.788688
row 6,,0.608906,0.320885,0.210881,,0.249239


### Setting values with scalars

In [15]:
series_obj_changed = series_obj
series_obj_changed['row 1', 'row 5', 'row 8'] = 9
series_obj_changed

row 1    9
row 2    1
row 3    2
row 4    3
row 5    9
row 6    5
row 7    6
row 8    9
dtype: int64

In [16]:
df_obj_changed = df_obj
df_obj_changed.loc[['row 2', 'row 5'], ['column 5', 'column 2']] +=1
df_obj_changed

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.638481,0.605738,0.046909,0.125227,0.627795,0.514878
row 2,0.52271,1.827939,0.620885,0.574423,1.43106,0.91162
row 3,0.845388,0.449311,0.758815,0.89324,0.762819,0.240679
row 4,0.117376,0.848415,0.255336,0.792824,0.069339,0.676753
row 5,0.450905,1.058101,0.974556,0.915639,1.025716,0.788688
row 6,0.032145,0.608906,0.320885,0.210881,0.012956,0.249239


In [18]:
df_obj_changed.loc['row 6'] +=1
df_obj_changed

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.638481,0.605738,0.046909,0.125227,0.627795,0.514878
row 2,0.52271,1.827939,0.620885,0.574423,1.43106,0.91162
row 3,0.845388,0.449311,0.758815,0.89324,0.762819,0.240679
row 4,0.117376,0.848415,0.255336,0.792824,0.069339,0.676753
row 5,0.450905,1.058101,0.974556,0.915639,1.025716,0.788688
row 6,1.032145,1.608906,1.320885,1.210881,1.012956,1.249239
