# Sorting and subsetting

In [2]:
import pandas as pd

homelessness = pd.read_csv("../datasets/homelessness.csv")
display(homelessness.columns)
display(homelessness.head())

Index(['Unnamed: 0', 'region', 'state', 'individuals', 'family_members',
       'state_pop'],
      dtype='object')

Unnamed: 0.1,Unnamed: 0,region,state,individuals,family_members,state_pop
0,0,East South Central,Alabama,2570.0,864.0,4887681
1,1,Pacific,Alaska,1434.0,582.0,735139
2,2,Mountain,Arizona,7259.0,2606.0,7158024
3,3,West South Central,Arkansas,2280.0,432.0,3009733
4,4,Pacific,California,109008.0,20964.0,39461588


#### Sorting

In [3]:
homelessness.sort_values("individuals")

Unnamed: 0.1,Unnamed: 0,region,state,individuals,family_members,state_pop
50,50,Mountain,Wyoming,434.0,205.0,577601
34,34,West North Central,North Dakota,467.0,75.0,758080
7,7,South Atlantic,Delaware,708.0,374.0,965479
39,39,New England,Rhode Island,747.0,354.0,1058287
45,45,New England,Vermont,780.0,511.0,624358
29,29,New England,New Hampshire,835.0,615.0,1353465
41,41,West North Central,South Dakota,836.0,323.0,878698
26,26,Mountain,Montana,983.0,422.0,1060665
48,48,South Atlantic,West Virginia,1021.0,222.0,1804291
24,24,East South Central,Mississippi,1024.0,328.0,2981020


#### Sorting in descending order

In [4]:
homelessness.sort_values("individuals", ascending=False)

Unnamed: 0.1,Unnamed: 0,region,state,individuals,family_members,state_pop
4,4,Pacific,California,109008.0,20964.0,39461588
32,32,Mid-Atlantic,New York,39827.0,52070.0,19530351
9,9,South Atlantic,Florida,21443.0,9587.0,21244317
43,43,West South Central,Texas,19199.0,6111.0,28628666
47,47,Pacific,Washington,16424.0,5880.0,7523869
37,37,Pacific,Oregon,11139.0,3337.0,4181886
38,38,Mid-Atlantic,Pennsylvania,8163.0,5349.0,12800922
5,5,Mountain,Colorado,7607.0,3250.0,5691287
2,2,Mountain,Arizona,7259.0,2606.0,7158024
28,28,Mountain,Nevada,7058.0,486.0,3027341


In [5]:
homelessness.sort_values(["individuals", "family_members"])

Unnamed: 0.1,Unnamed: 0,region,state,individuals,family_members,state_pop
50,50,Mountain,Wyoming,434.0,205.0,577601
34,34,West North Central,North Dakota,467.0,75.0,758080
7,7,South Atlantic,Delaware,708.0,374.0,965479
39,39,New England,Rhode Island,747.0,354.0,1058287
45,45,New England,Vermont,780.0,511.0,624358
29,29,New England,New Hampshire,835.0,615.0,1353465
41,41,West North Central,South Dakota,836.0,323.0,878698
26,26,Mountain,Montana,983.0,422.0,1060665
48,48,South Atlantic,West Virginia,1021.0,222.0,1804291
24,24,East South Central,Mississippi,1024.0,328.0,2981020


#### Sorting by multiple variables

In [6]:
homelessness.sort_values(["individuals", "family_members"], ascending=[True, False])

Unnamed: 0.1,Unnamed: 0,region,state,individuals,family_members,state_pop
50,50,Mountain,Wyoming,434.0,205.0,577601
34,34,West North Central,North Dakota,467.0,75.0,758080
7,7,South Atlantic,Delaware,708.0,374.0,965479
39,39,New England,Rhode Island,747.0,354.0,1058287
45,45,New England,Vermont,780.0,511.0,624358
29,29,New England,New Hampshire,835.0,615.0,1353465
41,41,West North Central,South Dakota,836.0,323.0,878698
26,26,Mountain,Montana,983.0,422.0,1060665
48,48,South Atlantic,West Virginia,1021.0,222.0,1804291
24,24,East South Central,Mississippi,1024.0,328.0,2981020


#### Subsetting columns

In [7]:
homelessness["region"]

0     East South Central
1                Pacific
2               Mountain
3     West South Central
4                Pacific
5               Mountain
6            New England
7         South Atlantic
8         South Atlantic
9         South Atlantic
10        South Atlantic
11               Pacific
12              Mountain
13    East North Central
14    East North Central
15    West North Central
16    West North Central
17    East South Central
18    West South Central
19           New England
20        South Atlantic
21           New England
22    East North Central
23    West North Central
24    East South Central
25    West North Central
26              Mountain
27    West North Central
28              Mountain
29           New England
30          Mid-Atlantic
31              Mountain
32          Mid-Atlantic
33        South Atlantic
34    West North Central
35    East North Central
36    West South Central
37               Pacific
38          Mid-Atlantic
39           New England


#### Subsetting multiple columns

In [8]:
homelessness[["region", "state"]]

Unnamed: 0,region,state
0,East South Central,Alabama
1,Pacific,Alaska
2,Mountain,Arizona
3,West South Central,Arkansas
4,Pacific,California
5,Mountain,Colorado
6,New England,Connecticut
7,South Atlantic,Delaware
8,South Atlantic,District of Columbia
9,South Atlantic,Florida


In [9]:
cols_to_subset = ["region", "state"]
homelessness[cols_to_subset]

Unnamed: 0,region,state
0,East South Central,Alabama
1,Pacific,Alaska
2,Mountain,Arizona
3,West South Central,Arkansas
4,Pacific,California
5,Mountain,Colorado
6,New England,Connecticut
7,South Atlantic,Delaware
8,South Atlantic,District of Columbia
9,South Atlantic,Florida


#### Subsetting rows

In [11]:
homelessness["family_members"] > 400

0      True
1      True
2      True
3      True
4      True
5      True
6      True
7     False
8      True
9      True
10     True
11     True
12     True
13     True
14     True
15     True
16     True
17     True
18     True
19     True
20     True
21     True
22     True
23     True
24    False
25     True
26     True
27     True
28     True
29     True
30     True
31     True
32     True
33     True
34    False
35     True
36     True
37     True
38     True
39    False
40     True
41    False
42     True
43     True
44     True
45     True
46     True
47     True
48    False
49     True
50    False
Name: family_members, dtype: bool

In [12]:
homelessness[homelessness["family_members"] > 400]

Unnamed: 0.1,Unnamed: 0,region,state,individuals,family_members,state_pop
0,0,East South Central,Alabama,2570.0,864.0,4887681
1,1,Pacific,Alaska,1434.0,582.0,735139
2,2,Mountain,Arizona,7259.0,2606.0,7158024
3,3,West South Central,Arkansas,2280.0,432.0,3009733
4,4,Pacific,California,109008.0,20964.0,39461588
5,5,Mountain,Colorado,7607.0,3250.0,5691287
6,6,New England,Connecticut,2280.0,1696.0,3571520
8,8,South Atlantic,District of Columbia,3770.0,3134.0,701547
9,9,South Atlantic,Florida,21443.0,9587.0,21244317
10,10,South Atlantic,Georgia,6943.0,2556.0,10511131


#### Subsetting based on text data

In [15]:
homelessness[homelessness["region"] == "New England"]

Unnamed: 0.1,Unnamed: 0,region,state,individuals,family_members,state_pop
6,6,New England,Connecticut,2280.0,1696.0,3571520
19,19,New England,Maine,1450.0,1066.0,1339057
21,21,New England,Massachusetts,6811.0,13257.0,6882635
29,29,New England,New Hampshire,835.0,615.0,1353465
39,39,New England,Rhode Island,747.0,354.0,1058287
45,45,New England,Vermont,780.0,511.0,624358


#### Subsetting based on dates

In [None]:
homelessness[homelessness["date"] < "2015-01-01"]

#### Subsetting based on multiple conditions

In [21]:
is_england = homelessness["region"] == "New England"
is_vermont = homelessness["state"] == "Vermont"
homelessness[is_england & is_vermont]

Unnamed: 0.1,Unnamed: 0,region,state,individuals,family_members,state_pop
45,45,New England,Vermont,780.0,511.0,624358


#### Subsetting using .isin()

In [23]:
is_england_or_south_atlantic = homelessness["region"].isin(["New England", "South Atlantic"])
homelessness[is_england_or_south_atlantic]

Unnamed: 0.1,Unnamed: 0,region,state,individuals,family_members,state_pop
6,6,New England,Connecticut,2280.0,1696.0,3571520
7,7,South Atlantic,Delaware,708.0,374.0,965479
8,8,South Atlantic,District of Columbia,3770.0,3134.0,701547
9,9,South Atlantic,Florida,21443.0,9587.0,21244317
10,10,South Atlantic,Georgia,6943.0,2556.0,10511131
19,19,New England,Maine,1450.0,1066.0,1339057
20,20,South Atlantic,Maryland,4914.0,2230.0,6035802
21,21,New England,Massachusetts,6811.0,13257.0,6882635
29,29,New England,New Hampshire,835.0,615.0,1353465
33,33,South Atlantic,North Carolina,6451.0,2817.0,10381615
