# Filtering & Selecting

In [1]:
import numpy as np 
import pandas as pd 

from pandas import Series, DataFrame

### Selecting and retrieving data 

You can write an index value in two forms 
- Label index
- Integer index


In [2]:
series_obj = Series(np.arange(8), index=['row 1', 'row 2', 'row 3', 'row 4', 'row 5', 'row 6', 'row 7', 'row 8'])
series_obj

row 1    0
row 2    1
row 3    2
row 4    3
row 5    4
row 6    5
row 7    6
row 8    7
dtype: int64

In [3]:
# select by name
series_obj['row 7']

6

In [4]:
# select by index
series_obj[0]

0

In [5]:
series_obj[[0, 7]]

row 1    0
row 8    7
dtype: int64

In [6]:
# create a dataframe with random numbers
np.random.seed(25)
df_obj = DataFrame(
    np.random.rand(36).reshape((6,6)),
    index = ['row 1', 'row 2', 'row 3', 'row 4', 'row 5', 'row 6'],
    columns = ['column 1', 'column 2', 'column 3', 'column 4', 'column 5', 'column 6']
)
df_obj

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
row 2,0.684969,0.437611,0.556229,0.36708,0.402366,0.113041
row 3,0.447031,0.585445,0.161985,0.520719,0.326051,0.699186
row 4,0.366395,0.836375,0.481343,0.516502,0.383048,0.997541
row 5,0.514244,0.559053,0.03445,0.71993,0.421004,0.436935
row 6,0.281701,0.900274,0.669612,0.456069,0.289804,0.525819


In [7]:
# selecting columns and rows
df_obj.loc[['row 2', 'row 5'], ['column 5', 'column 2']]

Unnamed: 0,column 5,column 2
row 2,0.402366,0.437611
row 5,0.421004,0.559053


### Data Slicing


In [8]:
series_obj['row 3':'row 7']

row 3    2
row 4    3
row 5    4
row 6    5
row 7    6
dtype: int64

In [9]:
df_obj['row 3':'row 5']

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 3,0.447031,0.585445,0.161985,0.520719,0.326051,0.699186
row 4,0.366395,0.836375,0.481343,0.516502,0.383048,0.997541
row 5,0.514244,0.559053,0.03445,0.71993,0.421004,0.436935


### Comparing with scalars

In [10]:
series_obj < .2

row 1     True
row 2    False
row 3    False
row 4    False
row 5    False
row 6    False
row 7    False
row 8    False
dtype: bool

In [11]:
df_obj < .2

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,False,False,False,True,False,True
row 2,False,False,False,False,False,True
row 3,False,False,True,False,False,False
row 4,False,False,False,False,False,False
row 5,False,False,True,False,False,False
row 6,False,False,False,False,False,False


### Filtering with scalars

In [12]:
series_obj[series_obj > 5]

row 7    6
row 8    7
dtype: int64

In [13]:
df_obj[df_obj > .2]

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.870124,0.582277,0.278839,,0.4111,
row 2,0.684969,0.437611,0.556229,0.36708,0.402366,
row 3,0.447031,0.585445,,0.520719,0.326051,0.699186
row 4,0.366395,0.836375,0.481343,0.516502,0.383048,0.997541
row 5,0.514244,0.559053,,0.71993,0.421004,0.436935
row 6,0.281701,0.900274,0.669612,0.456069,0.289804,0.525819


### Setting values with scalars

In [14]:
series_obj_changed = series_obj
series_obj_changed['row 1', 'row 5', 'row 8'] = 9
series_obj_changed

row 1    9
row 2    1
row 3    2
row 4    3
row 5    9
row 6    5
row 7    6
row 8    9
dtype: int64

In [15]:
df_obj_changed = df_obj
df_obj_changed.loc[['row 2', 'row 5'], ['column 5', 'column 2']] +=1
df_obj_changed

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
row 2,0.684969,1.437611,0.556229,0.36708,1.402366,0.113041
row 3,0.447031,0.585445,0.161985,0.520719,0.326051,0.699186
row 4,0.366395,0.836375,0.481343,0.516502,0.383048,0.997541
row 5,0.514244,1.559053,0.03445,0.71993,1.421004,0.436935
row 6,0.281701,0.900274,0.669612,0.456069,0.289804,0.525819


In [16]:
df_obj_changed.loc['row 6'] +=1
df_obj_changed

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
row 2,0.684969,1.437611,0.556229,0.36708,1.402366,0.113041
row 3,0.447031,0.585445,0.161985,0.520719,0.326051,0.699186
row 4,0.366395,0.836375,0.481343,0.516502,0.383048,0.997541
row 5,0.514244,1.559053,0.03445,0.71993,1.421004,0.436935
row 6,1.281701,1.900274,1.669612,1.456069,1.289804,1.525819
