## 16. Filtering and Indexing

In [1]:
# load pandas
import pandas as pd
import numpy as np

url = 'https://github.com/mattharrison/datasets/raw/master/data/siena2018-pres.csv'
df = pd.read_csv(url, index_col=0)

def tweak_siena_pres(df):
    def int64_to_uint8(df_):
        cols = df_.select_dtypes('int64')
        return (df_
                .astype({col:'uint8' for col in cols}))
    
    return (df
            .rename(columns={'Seq.':'Seq'})
            .rename(columns={k:v.replace(' ', '_') for k,v in
                             {
                                 'BG':'Background',
                                 'PL': 'Party leadership', 
                                 'CAb': 'Communication Ability',
                                 'RC': 'Relations with Congress',
                                 'CAp': 'Court appointments',
                                 'HE': 'Handling of economy',
                                 'L': 'Luck',
                                 'AC': 'Ability to compromise',
                                 'WR': 'Willing to take risks',
                                 'EAp': 'Executive appointments',
                                 'OA': 'Overall ability',
                                 'Im': 'Imagination',
                                 'DA': 'Domestic accomplishments',
                                 'Int': 'Integrity',
                                 'EAb': 'Executive ability',
                                 'FPA': 'Foreign policy accomplishments',
                                 'LA': 'Leadership ability',
                                 'IQ': 'Intelligence',
                                 'AM': 'Avoid crucial mistakes',
                                 'EV': "Experts' view",
                                 'O': 'Overall'
                                }.items()})
                                .astype({'Party': 'category'})
                                .pipe(int64_to_uint8)
                                .assign(
                                    Average_rank = lambda df_ : (df_.select_dtypes('uint8')
                                                                 .sum(axis=1).rank(method='dense').astype('uint8')),
                                    Quartile = lambda df_: pd.qcut(df_.Average_rank, 4, labels='1st 2nd 3rd 4th'.split())
                                )
            )

pres = tweak_siena_pres(df)
pres.head()

Unnamed: 0,Seq,President,Party,Bg,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
1,1,George Washington,Independent,7,7,1,10,1,6,2,...,1,1,1,2,2,1,2,1,1,1st
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,4,13,15,19,13,16,10,14,13,2nd
3,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,14,...,7,20,4,6,9,7,5,5,5,1st
4,4,James Madison,Democratic-Republican,4,6,7,3,16,15,6,...,6,14,7,11,19,11,8,7,7,1st
5,5,James Monroe,Democratic-Republican,9,14,11,18,6,16,7,...,11,9,9,10,5,6,9,8,8,1st


In [2]:
# renaming an index
def name_to_initial(val):
    names = val.split()
    return ' '.join([f'{names[0][0]}.', *names[1:]])

In [4]:
(pres
 .set_index('President')
 .rename(name_to_initial))[:10]

Unnamed: 0_level_0,Seq,Party,Bg,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,Executive_ability,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
President,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
G. Washington,1,Independent,7,7,1,10,1,6,2,2,...,1,1,1,2,2,1,2,1,1,1st
J. Adams,2,Federalist,3,13,4,4,24,14,31,21,...,4,13,15,19,13,16,10,14,13,2nd
T. Jefferson,3,Democratic-Republican,2,2,14,1,8,5,14,6,...,7,20,4,6,9,7,5,5,5,1st
J. Madison,4,Democratic-Republican,4,6,7,3,16,15,6,13,...,6,14,7,11,19,11,8,7,7,1st
J. Monroe,5,Democratic-Republican,9,14,11,18,6,16,7,10,...,11,9,9,10,5,6,9,8,8,1st
J. Quincy Adams,6,Democratic-Republican,1,9,6,5,29,19,24,22,...,15,17,18,21,15,14,18,18,18,2nd
A. Jackson,7,Democratic,37,15,29,28,4,4,38,11,...,30,25,25,17,23,20,19,19,19,2nd
M. Van Buren,8,Democratic,23,22,27,25,34,28,20,28,...,25,31,26,29,27,24,28,25,25,3rd
W. Henry Harrison,9,Whig,22,38,28,37,44,32,41,38,...,42,41,40,42,44,37,39,39,38,4th
J. Tyler,10,Independent,34,33,35,34,22,26,37,36,...,38,34,36,36,26,32,36,37,37,4th


In [5]:
# reset index
pres.reset_index()[:10]

Unnamed: 0,index,Seq,President,Party,Bg,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
0,1,1,George Washington,Independent,7,7,1,10,1,6,...,1,1,1,2,2,1,2,1,1,1st
1,2,2,John Adams,Federalist,3,13,4,4,24,14,...,4,13,15,19,13,16,10,14,13,2nd
2,3,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,...,7,20,4,6,9,7,5,5,5,1st
3,4,4,James Madison,Democratic-Republican,4,6,7,3,16,15,...,6,14,7,11,19,11,8,7,7,1st
4,5,5,James Monroe,Democratic-Republican,9,14,11,18,6,16,...,11,9,9,10,5,6,9,8,8,1st
5,6,6,John Quincy Adams,Democratic-Republican,1,9,6,5,29,19,...,15,17,18,21,15,14,18,18,18,2nd
6,7,7,Andrew Jackson,Democratic,37,15,29,28,4,4,...,30,25,25,17,23,20,19,19,19,2nd
7,8,8,Martin Van Buren,Democratic,23,22,27,25,34,28,...,25,31,26,29,27,24,28,25,25,3rd
8,9,9,William Henry Harrison,Whig,22,38,28,37,44,32,...,42,41,40,42,44,37,39,39,38,4th
9,10,10,John Tyler,Independent,34,33,35,34,22,26,...,38,34,36,36,26,32,36,37,37,4th


In [6]:
# index by position
pres.iloc[:10]

Unnamed: 0,Seq,President,Party,Bg,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
1,1,George Washington,Independent,7,7,1,10,1,6,2,...,1,1,1,2,2,1,2,1,1,1st
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,4,13,15,19,13,16,10,14,13,2nd
3,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,14,...,7,20,4,6,9,7,5,5,5,1st
4,4,James Madison,Democratic-Republican,4,6,7,3,16,15,6,...,6,14,7,11,19,11,8,7,7,1st
5,5,James Monroe,Democratic-Republican,9,14,11,18,6,16,7,...,11,9,9,10,5,6,9,8,8,1st
6,6,John Quincy Adams,Democratic-Republican,1,9,6,5,29,19,24,...,15,17,18,21,15,14,18,18,18,2nd
7,7,Andrew Jackson,Democratic,37,15,29,28,4,4,38,...,30,25,25,17,23,20,19,19,19,2nd
8,8,Martin Van Buren,Democratic,23,22,27,25,34,28,20,...,25,31,26,29,27,24,28,25,25,3rd
9,9,William Henry Harrison,Whig,22,38,28,37,44,32,41,...,42,41,40,42,44,37,39,39,38,4th
10,10,John Tyler,Independent,34,33,35,34,22,26,37,...,38,34,36,36,26,32,36,37,37,4th


In [7]:
pres.iloc[1]

Seq                                        2
President                         John Adams
Party                             Federalist
Bg                                         3
Imagination                               13
Integrity                                  4
Intelligence                               4
Luck                                      24
Willing_to_take_risks                     14
Ability_to_compromise                     31
Executive_ability                         21
Leadership_ability                        21
Communication_Ability                     13
Overall_ability                            8
Party_leadership                          28
Relations_with_Congress                   17
Court_appointments                         4
Handling_of_economy                       13
Executive_appointments                    15
Domestic_accomplishments                  19
Foreign_policy_accomplishments            13
Avoid_crucial_mistakes                    16
Experts'_v

In [9]:
pres.iloc[[1]] # double brackets return dataframe

Unnamed: 0,Seq,President,Party,Bg,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,4,13,15,19,13,16,10,14,13,2nd


In [13]:
pres.loc[1:5]

Unnamed: 0,Seq,President,Party,Bg,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
1,1,George Washington,Independent,7,7,1,10,1,6,2,...,1,1,1,2,2,1,2,1,1,1st
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,4,13,15,19,13,16,10,14,13,2nd
3,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,14,...,7,20,4,6,9,7,5,5,5,1st
4,4,James Madison,Democratic-Republican,4,6,7,3,16,15,6,...,6,14,7,11,19,11,8,7,7,1st
5,5,James Monroe,Democratic-Republican,9,14,11,18,6,16,7,...,11,9,9,10,5,6,9,8,8,1st


In [16]:
# slice by using partial strings on sorted index, slice rows and columns
(pres
 .set_index('President')
 .sort_index()
 .loc['C': 'Thomas Jefferson', 'Party':'Integrity'])

Unnamed: 0_level_0,Party,Bg,Imagination,Integrity
President,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Calvin Coolidge,Republican,32,36,17
Chester A. Arthur,Republican,41,31,37
Donald Trump,Republican,43,40,44
Dwight D. Eisenhower,Republican,11,18,5
Franklin D. Roosevelt,Democratic,6,3,16
Franklin Pierce,Democratic,38,39,38
George H. W. Bush,Republican,10,27,18
George W. Bush,Republican,17,29,33
George Washington,Independent,7,7,1
Gerald Ford,Republican,18,32,10
