### Un-pickle the data 

In [1]:
import pickle

with open('homeless_data.pkl', 'rb') as f:
    data = pickle.load(f)

### Data exploration 

In [2]:
data.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
0,East South Central,Alabama,2570.0,864.0,4887681
1,Pacific,Alaska,1434.0,582.0,735139
2,Mountain,Arizona,7259.0,2606.0,7158024
3,West South Central,Arkansas,2280.0,432.0,3009733
4,Pacific,California,109008.0,20964.0,39461588


In [3]:
data.columns

Index(['region', 'state', 'individuals', 'family_members', 'state_pop'], dtype='object')

### Setting column as index 

In [7]:
data_reg = data.set_index('region') # setting region column as index. 
data_reg.head()

Unnamed: 0_level_0,state,individuals,family_members,state_pop
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East South Central,Alabama,2570.0,864.0,4887681
Pacific,Alaska,1434.0,582.0,735139
Mountain,Arizona,7259.0,2606.0,7158024
West South Central,Arkansas,2280.0,432.0,3009733
Pacific,California,109008.0,20964.0,39461588


### Resetting index 

In [9]:
data_reg1 = data_reg.reset_index()
data_reg1.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
0,East South Central,Alabama,2570.0,864.0,4887681
1,Pacific,Alaska,1434.0,582.0,735139
2,Mountain,Arizona,7259.0,2606.0,7158024
3,West South Central,Arkansas,2280.0,432.0,3009733
4,Pacific,California,109008.0,20964.0,39461588


### Index & Subsetting 

In [11]:
data[data['region'].isin(['Mountain','Pacific'])]

Unnamed: 0,region,state,individuals,family_members,state_pop
1,Pacific,Alaska,1434.0,582.0,735139
2,Mountain,Arizona,7259.0,2606.0,7158024
4,Pacific,California,109008.0,20964.0,39461588
5,Mountain,Colorado,7607.0,3250.0,5691287
11,Pacific,Hawaii,4131.0,2399.0,1420593
12,Mountain,Idaho,1297.0,715.0,1750536
26,Mountain,Montana,983.0,422.0,1060665
28,Mountain,Nevada,7058.0,486.0,3027341
31,Mountain,New Mexico,1949.0,602.0,2092741
37,Pacific,Oregon,11139.0,3337.0,4181886


In [15]:
data_reg.loc[['Pacific']] # since data_reg has 'region' set as index

Unnamed: 0_level_0,state,individuals,family_members,state_pop
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Pacific,Alaska,1434.0,582.0,735139
Pacific,California,109008.0,20964.0,39461588
Pacific,Hawaii,4131.0,2399.0,1420593
Pacific,Oregon,11139.0,3337.0,4181886
Pacific,Washington,16424.0,5880.0,7523869


### Multi-level indexes 

In [18]:
data_reg_state = data.set_index(['region','state'])
data_reg_state.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,individuals,family_members,state_pop
region,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East South Central,Alabama,2570.0,864.0,4887681
Pacific,Alaska,1434.0,582.0,735139
Mountain,Arizona,7259.0,2606.0,7158024
West South Central,Arkansas,2280.0,432.0,3009733
Pacific,California,109008.0,20964.0,39461588


The inner level 'state' is nested within 'region'.

In [22]:
data_reg_state.loc[[('Pacific','Alaska')]]

Unnamed: 0_level_0,Unnamed: 1_level_0,individuals,family_members,state_pop
region,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Pacific,Alaska,1434.0,582.0,735139


To subset using both indexes, the input for loc should be a tuple.

### Sorting by index values

In [26]:
data_reg_state.sort_index().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,individuals,family_members,state_pop
region,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East North Central,Illinois,6752.0,3891.0,12723071
East North Central,Indiana,3776.0,1482.0,6695497
East North Central,Michigan,5209.0,3142.0,9984072
East North Central,Ohio,6929.0,3320.0,11676341
East North Central,Wisconsin,2740.0,2167.0,5807406


By default, sorting is done in ascending form from outer to inner.

In [33]:
data_reg_state.sort_index(ascending = (True, False)).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,individuals,family_members,state_pop
region,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East North Central,Wisconsin,2740.0,2167.0,5807406
East North Central,Ohio,6929.0,3320.0,11676341
East North Central,Michigan,5209.0,3142.0,9984072
East North Central,Indiana,3776.0,1482.0,6695497
East North Central,Illinois,6752.0,3891.0,12723071


## Slicing based on .loc and .iloc 

In [34]:
data.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
0,East South Central,Alabama,2570.0,864.0,4887681
1,Pacific,Alaska,1434.0,582.0,735139
2,Mountain,Arizona,7259.0,2606.0,7158024
3,West South Central,Arkansas,2280.0,432.0,3009733
4,Pacific,California,109008.0,20964.0,39461588


In [36]:
data_reg_state.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,individuals,family_members,state_pop
region,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East South Central,Alabama,2570.0,864.0,4887681
Pacific,Alaska,1434.0,582.0,735139
Mountain,Arizona,7259.0,2606.0,7158024
West South Central,Arkansas,2280.0,432.0,3009733
Pacific,California,109008.0,20964.0,39461588


In [38]:
data_reg_state_srt = data_reg_state.sort_index()

In [41]:
data_reg_state_srt.loc['East North Central':'Mid-Atlantic']

Unnamed: 0_level_0,Unnamed: 1_level_0,individuals,family_members,state_pop
region,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East North Central,Illinois,6752.0,3891.0,12723071
East North Central,Indiana,3776.0,1482.0,6695497
East North Central,Michigan,5209.0,3142.0,9984072
East North Central,Ohio,6929.0,3320.0,11676341
East North Central,Wisconsin,2740.0,2167.0,5807406
East South Central,Alabama,2570.0,864.0,4887681
East South Central,Kentucky,2735.0,953.0,4461153
East South Central,Mississippi,1024.0,328.0,2981020
East South Central,Tennessee,6139.0,1744.0,6771631
Mid-Atlantic,New Jersey,6048.0,3350.0,8886025


The final value is includes in the slicing which is the opposite of list splicing.

In [42]:
data_reg_state_srt.loc['Illinois':'Michigan']

Unnamed: 0_level_0,Unnamed: 1_level_0,individuals,family_members,state_pop
region,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


Slicing within the inner index returns an empty dataframe

In [43]:
data_reg_state_srt.loc[('East North Central','Illinois'):('East North Central','Ohio')]

Unnamed: 0_level_0,Unnamed: 1_level_0,individuals,family_members,state_pop
region,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East North Central,Illinois,6752.0,3891.0,12723071
East North Central,Indiana,3776.0,1482.0,6695497
East North Central,Michigan,5209.0,3142.0,9984072
East North Central,Ohio,6929.0,3320.0,11676341


In order to slice the inner index correctly, the first and last positions have to be in the form of tuples containing outer and inner index info.

### Slicing both index and columns 

In [44]:
data_reg_state_srt.loc[('East North Central','Illinois'):('East North Central','Ohio'), 'individuals':'family_members']

Unnamed: 0_level_0,Unnamed: 1_level_0,individuals,family_members
region,state,Unnamed: 2_level_1,Unnamed: 3_level_1
East North Central,Illinois,6752.0,3891.0
East North Central,Indiana,3776.0,1482.0
East North Central,Michigan,5209.0,3142.0
East North Central,Ohio,6929.0,3320.0


### Slicing by row/column number .iloc 

In [46]:
data.iloc[2:5,2:4]

Unnamed: 0,individuals,family_members
2,7259.0,2606.0
3,2280.0,432.0
4,109008.0,20964.0


Like lists but unlike .loc, the final values are not included in the slice.