# Indexing and Selecting data

## Select Row

### by index

Let's import panadas and make a dataframe first

In [1]:
import pandas as pd

In [8]:
employee_dict = [
    {'name': 'John', 'age': 23, 'job': 'developer'},
    {'name': 'James', 'age': 33, 'job': 'data analyst'},
    {'name': 'Raymond', 'age': 35, 'job': 'accountant'},
    {'name': 'Edward', 'age': 56, 'job': 'mechanical engineer'},
]

df = pd.DataFrame.from_dict(employee_dict)
df

Unnamed: 0,name,age,job
0,John,23,developer
1,James,33,data analyst
2,Raymond,35,accountant
3,Edward,56,mechanical engineer


Select rows from index 1 to 2

In [9]:
df[1:3]

Unnamed: 0,name,age,job
1,James,33,data analyst
2,Raymond,35,accountant


Select rows - index 0 and 3 only

In [12]:
# pandas.DataFrame.loc
# Access a group of rows and columns

df.loc[[0,3]]

Unnamed: 0,name,age,job
0,John,23,developer
3,Edward,56,mechanical engineer


### by column condition

Select rows by column condition

Select employees older than 25

In [13]:
df_greater_than_25 = df[df.age > 25]

df_greater_than_25

Unnamed: 0,name,age,job
1,James,33,data analyst
2,Raymond,35,accountant
3,Edward,56,mechanical engineer


In [14]:
# Here is another way :
# pandas.DataFrame.query(str)

df_greater_than_25 = df.query('age > 25')
df_greater_than_25

Unnamed: 0,name,age,job
1,James,33,data analyst
2,Raymond,35,accountant
3,Edward,56,mechanical engineer


Select employees who is older than 25 AND is a data analyst

In [15]:
df_filtered = df[(df.age>25) & (df.job == 'data analyst')]
df_filtered

Unnamed: 0,name,age,job
1,James,33,data analyst


## Select Column
### by index

In [16]:
df

Unnamed: 0,name,age,job
0,John,23,developer
1,James,33,data analyst
2,Raymond,35,accountant
3,Edward,56,mechanical engineer


Select the name and age column

In [17]:
# pandas.DataFrame.iloc()
# Purely integer-location based indexing for selection by position
df.iloc[:, 0:2]

Unnamed: 0,name,age
0,John,23
1,James,33
2,Raymond,35
3,Edward,56


Select the name and job column

In [18]:
df.iloc[:, [0,2]]

Unnamed: 0,name,job
0,John,developer
1,James,data analyst
2,Raymond,accountant
3,Edward,mechanical engineer


### by column name

Select the name and age column

In [19]:
df_filtered = df[['name', 'age']]
df_filtered

Unnamed: 0,name,age
0,John,23
1,James,33
2,Raymond,35
3,Edward,56


In [20]:
# Here is another way :
# pandas.DataFrame.filter()
# Subset the dataframe rows or columns according to the specified index labels.

df.filter(items=['name', 'job'])

Unnamed: 0,name,job
0,John,developer
1,James,data analyst
2,Raymond,accountant
3,Edward,mechanical engineer
