## Selecting a subset of data

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('cdc.csv')

In [3]:
data.head()

Unnamed: 0,genhlth,exerany,hlthplan,smoke100,height,weight,wtdesire,age,gender
0,good,0,1,0,70,175,175,77,m
1,good,0,1,1,64,125,115,33,f
2,good,1,1,1,60,105,105,49,f
3,good,1,1,0,66,132,124,42,f
4,very good,0,1,0,61,150,130,55,f


In [4]:
# selecting a specific column - age
age = data['age']
age.head()

0    77
1    33
2    49
3    42
4    55
Name: age, dtype: int64

In [5]:
# selecting several columns - genhlth, smoke100, age
GSA = data[['genhlth','smoke100','age']]
GSA.head()

Unnamed: 0,genhlth,smoke100,age
0,good,0,77
1,good,1,33
2,good,1,49
3,good,0,42
4,very good,0,55


In [6]:
# filtering rows based on conditions - age > 80
over_80 = data[data['age'] > 80]
over_80.head()

Unnamed: 0,genhlth,exerany,hlthplan,smoke100,height,weight,wtdesire,age,gender
104,poor,1,1,0,71,175,175,87,m
210,excellent,1,1,1,62,116,110,84,f
226,good,0,1,0,64,145,145,82,f
286,very good,1,1,0,61,108,108,85,f
287,good,1,1,1,60,107,107,85,f


In [7]:
over_80.shape

(486, 9)

In [8]:
# filtering rows based on conditions - genhlth is excellent or very good
health_great = data[data['genhlth'].isin(['excellent','very good'])]
health_great.head()

Unnamed: 0,genhlth,exerany,hlthplan,smoke100,height,weight,wtdesire,age,gender
4,very good,0,1,0,61,150,130,55,f
5,very good,1,1,0,64,114,114,55,f
6,very good,1,1,0,71,194,185,31,m
7,very good,0,1,0,67,170,160,45,m
10,excellent,1,1,1,69,186,175,46,m


In [9]:
health_great.shape

(11629, 9)

In [10]:
# ... another way
health_great_1 = data[(data['genhlth']=='excellent') | (data['genhlth']=='very good')]
health_great_1.head()

Unnamed: 0,genhlth,exerany,hlthplan,smoke100,height,weight,wtdesire,age,gender
4,very good,0,1,0,61,150,130,55,f
5,very good,1,1,0,64,114,114,55,f
6,very good,1,1,0,71,194,185,31,m
7,very good,0,1,0,67,170,160,45,m
10,excellent,1,1,1,69,186,175,46,m


In [11]:
health_great_1.shape

(11629, 9)

In [13]:
# selecting both rows and columns - select columns genhlth, age and gender where genhlth is excellent
health_exc = data.loc[data['genhlth']=='excellent',['genhlth','age','gender']]
health_exc.head()

Unnamed: 0,genhlth,age,gender
10,excellent,46,m
12,excellent,21,m
13,excellent,69,m
26,excellent,44,f
27,excellent,42,f


In [14]:
health_exc.shape

(4657, 3)