# DataFrame data structure (panda)

![title](dataframe_desc.png)


In [4]:
import pandas as pd

In [44]:
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], 
                  index=['Store 1', 'Store 1', 'Store 2'])

print (df)
df

         Cost Item Purchased   Name
Store 1  22.5       Dog Food  Chris
Store 1   2.5   Kitty Litter  Kevyn
Store 2   5.0      Bird Seed  Vinod


Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [45]:
df.head()

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [46]:
df.loc['Store 2']

Cost                      5
Item Purchased    Bird Seed
Name                  Vinod
Name: Store 2, dtype: object

In [47]:
# 1 row returned ( Series )
type(df.loc['Store 2'])

pandas.core.series.Series

In [48]:
# 2 rows returned ( DataFrame )
type(df.loc['Store 1'])

pandas.core.frame.DataFrame

In [50]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [52]:
df.loc['Store 1', 'Name']

Store 1    Chris
Store 1    Kevyn
Name: Name, dtype: object

In [53]:
df['Cost']

Store 1    22.5
Store 1     2.5
Store 2     5.0
Name: Cost, dtype: float64

In [54]:
# chaining is not good. (returns copy of data and not a view)
df.loc['Store 1']['Name']

Store 1    Chris
Store 1    Kevyn
Name: Name, dtype: object

In [57]:
df.loc['Store 1', ['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 1,Kevyn,2.5


In [59]:
df.loc[:, ['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 1,Kevyn,2.5
Store 2,Vinod,5.0


In [58]:
df.T

Unnamed: 0,Store 1,Store 1.1,Store 2
Cost,22.5,2.5,5
Item Purchased,Dog Food,Kitty Litter,Bird Seed
Name,Chris,Kevyn,Vinod


In [60]:
df.drop('Store 1') # df unchanged

Unnamed: 0,Cost,Item Purchased,Name
Store 2,5,Bird Seed,Vinod


In [61]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [62]:
df2 = df.copy()
df2 = df2.drop('Store 1') # df2 changed ( new ref )
df2

Unnamed: 0,Cost,Item Purchased,Name
Store 2,5,Bird Seed,Vinod


In [65]:
del df2['Name']
df2

Unnamed: 0,Cost,Item Purchased
Store 2,5,Bird Seed


In [68]:
# bcast def value
df['Location'] = None
df

Unnamed: 0,Cost,Item Purchased,Name,Location
Store 1,22.5,Dog Food,Chris,
Store 1,2.5,Kitty Litter,Kevyn,
Store 2,5.0,Bird Seed,Vinod,


In [69]:
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])

df['Cost'] *= 0.8

In [70]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,18,Dog Food,Chris
Store 1,2,Kitty Litter,Kevyn
Store 2,4,Bird Seed,Vinod


## Indexing and Loading


In [72]:
costs = df['Cost']
costs

Store 1    18
Store 1     2
Store 2     4
Name: Cost, dtype: float64

In [73]:
costs += 2
costs

Store 1    20
Store 1     4
Store 2     6
Name: Cost, dtype: float64

In [74]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,20,Dog Food,Chris
Store 1,4,Kitty Litter,Kevyn
Store 2,6,Bird Seed,Vinod


## Jupyter shell integration

In [8]:
!ls *csv
!whoami

olympics.csv
ds


## DataFrame from csv with formating

In [9]:
! cat olympics.csv | head -n 10 # 'o' to fold/collapse

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480
Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304
Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26


In [82]:
df = pd.read_csv('olympics.csv')
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
1,Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
2,Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
3,Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
4,Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12


In [87]:
df = pd.read_csv('olympics.csv', index_col=0, skiprows=1)
df.head()


Unnamed: 0,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [84]:
df.columns


Index(['№ Summer', '01 !', '02 !', '03 !', 'Total', '№ Winter', '01 !.1',
       '02 !.1', '03 !.1', 'Total.1', '№ Games', '01 !.2', '02 !.2', '03 !.2',
       'Combined total'],
      dtype='object')

In [77]:
! cat olympics.csv | head -n 5


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70


In [88]:
# reformat col names
for col in df.columns:
    if col[:2] == '01':
        df.rename(columns={col:'Gold'+col[4:]}, inplace=True)
    if col[:2] == '02':
        df.rename(columns={col:'Silver'+col[4:]}, inplace=True)
    if col[:2] == '03':
        df.rename(columns={col:'Bronze'+col[4:]}, inplace=True)
    if col[:1] == '№':
        df.rename(columns={col:'#'+col[1:]}, inplace=True)
df.head()

Unnamed: 0,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


## Boolean masking


![title](bool_mask.png)

## Querying a DataFrame

In [89]:
# bcast a comparison oper (gt) to all cells
# returns a bool Series
df['Gold'] > 0


Afghanistan (AFG)                               False
Algeria (ALG)                                    True
Argentina (ARG)                                  True
Armenia (ARM)                                    True
Australasia (ANZ) [ANZ]                          True
Australia (AUS) [AUS] [Z]                        True
Austria (AUT)                                    True
Azerbaijan (AZE)                                 True
Bahamas (BAH)                                    True
Bahrain (BRN)                                   False
Barbados (BAR) [BAR]                            False
Belarus (BLR)                                    True
Belgium (BEL)                                    True
Bermuda (BER)                                   False
Bohemia (BOH) [BOH] [Z]                         False
Botswana (BOT)                                  False
Brazil (BRA)                                     True
British West Indies (BWI) [BWI]                 False
Bulgaria (BUL) [H]          

In [90]:
# where returns a new DataFrame
only_gold = df.where(df['Gold'] > 0)
only_gold.head()

Unnamed: 0,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),,,,,,,,,,,,,,,
Algeria (ALG),12.0,5.0,2.0,8.0,15.0,3.0,0.0,0.0,0.0,0.0,15.0,5.0,2.0,8.0,15.0
Argentina (ARG),23.0,18.0,24.0,28.0,70.0,18.0,0.0,0.0,0.0,0.0,41.0,18.0,24.0,28.0,70.0
Armenia (ARM),5.0,1.0,2.0,9.0,12.0,6.0,0.0,0.0,0.0,0.0,11.0,1.0,2.0,9.0,12.0
Australasia (ANZ) [ANZ],2.0,3.0,4.0,5.0,12.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,4.0,5.0,12.0


In [33]:
only_gold['Gold'].count()

100

In [34]:
df['Gold'].count()

147

In [74]:
# remove NaN values
only_gold = only_gold.dropna()
only_gold.head()

Unnamed: 0,№ Summer,Gold,Silver,Bronze,Total,№ Winter,Gold.1,Silver.1,Bronze.1,Total.1,№ Games,Gold.2,Silver.2,Bronze.2,Combined total
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480


## Filtering Series or DataFrame

In [45]:
# returns a Series
type(df['Gold'] > 0)

pandas.core.series.Series

In [47]:
# returns a DataFrame
type(df[df['Gold'] > 0])

pandas.core.frame.DataFrame

In [52]:
len(df[(df['Gold'] > 0) | df['Gold.1'] > 0])

101

In [61]:
# DataFrame of countries that have Gold only in winter-games
# but no gold in summar games :

df[(df['Gold.1'] > 0) & (df['Gold'] == 0)]

Unnamed: 0,№ Summer,Gold,Silver,Bronze,Total,№ Winter,Gold.1,Silver.1,Bronze.1,Total.1,№ Games,Gold.2,Silver.2,Bronze.2,Combined total
Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9


In [68]:
# Write a query to return all of the names 
# of people who bought products worth more than $3.00.

purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])


# chen
df[df['Cost'] > 3]['Name']

# coursera ( *** is Transpose ??? *** )
df['Name'][df['Cost']>3]

Store 1    Chris
Store 2    Vinod
Name: Name, dtype: object

## Indexing DataFrames

In [99]:
df.head()

Unnamed: 0_level_0,# Summer,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total,Countries
Gold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,13,0,2,2,0,0,0,0,0,13,0,0,2,2,0
5,12,2,8,15,3,0,0,0,0,15,5,2,8,15,5
18,23,24,28,70,18,0,0,0,0,41,18,24,28,70,18
1,5,2,9,12,6,0,0,0,0,11,1,2,9,12,1
3,2,4,5,12,0,0,0,0,0,2,3,4,5,12,3


In [100]:
# indexing is destructive, first save current index as col
df['Countries'] = df.index
#df = df.set_index('Gold')
df.head()

Unnamed: 0_level_0,# Summer,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total,Countries
Gold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,13,0,2,2,0,0,0,0,0,13,0,0,2,2,0
5,12,2,8,15,3,0,0,0,0,15,5,2,8,15,5
18,23,24,28,70,18,0,0,0,0,41,18,24,28,70,18
1,5,2,9,12,6,0,0,0,0,11,1,2,9,12,1
3,2,4,5,12,0,0,0,0,0,2,3,4,5,12,3


In [101]:
# reset autosave the index as col and reindex
df = df.reset_index()
df.head()

Unnamed: 0,Gold,# Summer,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total,Countries
0,0,13,0,2,2,0,0,0,0,0,13,0,0,2,2,0
1,5,12,2,8,15,3,0,0,0,0,15,5,2,8,15,5
2,18,23,24,28,70,18,0,0,0,0,41,18,24,28,70,18
3,1,5,2,9,12,6,0,0,0,0,11,1,2,9,12,1
4,3,2,4,5,12,0,0,0,0,0,2,3,4,5,12,3


In [18]:
df = pd.read_csv('census.csv')
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,...,0.002295,-0.193196,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.83296,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861


In [19]:
df['SUMLEV'].unique()

array([40, 50])

In [12]:
# filter inner df from a df, then extracting a Series
type(df[df['SUMLEV'] == 50]['STATE'])

pandas.core.series.Series

In [48]:
df = pd.read_csv('census.csv')
df = df[df['SUMLEV'] == 50]
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.83296,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
5,50,3,6,1,9,Alabama,Blount County,57322,57322,57373,...,1.807375,-1.177622,-1.748766,-2.062535,-1.36997,1.859511,-0.84858,-1.402476,-1.577232,-0.884411


In [42]:
# filter onluy list of cols
# list(df.columns)
columns_to_keep = [
 'STNAME', 
 'CTYNAME',
 'POPESTIMATE2010',
 'POPESTIMATE2011',
 'POPESTIMATE2012',
 'POPESTIMATE2013',
 'POPESTIMATE2014',
 'POPESTIMATE2015',
 'BIRTHS2010',
 'BIRTHS2011',
 'BIRTHS2012',
 'BIRTHS2013',
 'BIRTHS2014',
 'BIRTHS2015']

In [49]:
df = df[columns_to_keep]
df.head()


Unnamed: 0,STNAME,CTYNAME,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,BIRTHS2010,BIRTHS2011,BIRTHS2012,BIRTHS2013,BIRTHS2014,BIRTHS2015
1,Alabama,Autauga County,54660,55253,55175,55038,55290,55347,151,636,615,574,623,600
2,Alabama,Baldwin County,183193,186659,190396,195126,199713,203709,517,2187,2092,2160,2186,2240
3,Alabama,Barbour County,27341,27226,27159,26973,26815,26489,70,335,300,283,260,269
4,Alabama,Bibb County,22861,22733,22642,22512,22549,22583,44,266,245,259,247,253
5,Alabama,Blount County,57373,57711,57776,57734,57658,57673,183,744,710,646,618,603


## Dual index


In [50]:
df = df.set_index(['STNAME', 'CTYNAME'])
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,BIRTHS2010,BIRTHS2011,BIRTHS2012,BIRTHS2013,BIRTHS2014,BIRTHS2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Alabama,Autauga County,54660,55253,55175,55038,55290,55347,151,636,615,574,623,600
Alabama,Baldwin County,183193,186659,190396,195126,199713,203709,517,2187,2092,2160,2186,2240
Alabama,Barbour County,27341,27226,27159,26973,26815,26489,70,335,300,283,260,269
Alabama,Bibb County,22861,22733,22642,22512,22549,22583,44,266,245,259,247,253
Alabama,Blount County,57373,57711,57776,57734,57658,57673,183,744,710,646,618,603


## Missing values


In [68]:
cdf = df.copy()
cdf.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,BIRTHS2010,BIRTHS2011,BIRTHS2012,BIRTHS2013,BIRTHS2014,BIRTHS2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Alabama,Autauga County,54660,55253,55175,55038,55290,55347,151,636,615,574,623,600
Alabama,Baldwin County,183193,186659,190396,195126,199713,203709,517,2187,2092,2160,2186,2240
Alabama,Barbour County,27341,27226,27159,26973,26815,26489,70,335,300,283,260,269
Alabama,Bibb County,22861,22733,22642,22512,22549,22583,44,266,245,259,247,253
Alabama,Blount County,57373,57711,57776,57734,57658,57673,183,744,710,646,618,603


In [69]:
# extracting a row (pd.Series) using dual index 
cdf.loc['Alabama', 'Barbour County']

POPESTIMATE2010    27341
POPESTIMATE2011    27226
POPESTIMATE2012    27159
POPESTIMATE2013    26973
POPESTIMATE2014    26815
POPESTIMATE2015    26489
BIRTHS2010            70
BIRTHS2011           335
BIRTHS2012           300
BIRTHS2013           283
BIRTHS2014           260
BIRTHS2015           269
Name: (Alabama, Barbour County), dtype: int64

In [70]:
# extracting a DataFrame using dual index (filtering subset)
# filtering using list of tuples (because indexing is 2d)

cdf.head([('Alabama', 'Barbour County'), ('Alabama', 'Bibb County')])

TypeError: int() argument must be a string, a bytes-like object or a number, not 'list'