# Pandas DataFrames: Reading, Indexing, and Querying

In [1]:
import pandas as pd
import numpy as np
# The dataframe data structure is the HEART OF THE PANDAS LIBRARY
# Dataframes are simply 2-axis, lebeled arrays with a special class-type that allows us to use pandas methods on them.

#                   Animals   Owners    <----- .columns
# .iloc, .loc -> 0   Dog      Chris
#                1   Cat      Kevyn <--- Values
#                2   Bear     Bob

# NOTE: we know .iloc and .loc are attributes. So is .columns

# QUESTION 1: HOW WOULD YOU PULL ROW 2?
# pet_df.iloc[1]

# QUESTION 2: HOW YOU YOU PULL THE OWNER'S COLUMN?
# pet_df["Owners"]

# QUESTION 3: FIND OUT WHAT ANIMAL IS ON THE THIRD RECORD
# pet_df.iloc[2]["Animals"]

In [11]:
# Transation 1: SERIES MADE FROM A DICTIONARY 
# Remember, dictionaries are Key:value pairs. THe Keys in the dicitonary are assigned as label indexes when converted to a Series

transaction_1 = pd.Series({'Name': 'Chris',
                          'Item Purchased': 'Dog Food',
                          'Cost': 22.50})

#Transaction 2: A series made from a dictionary
transaction_2 = pd.Series({'Name': 'Jeremiah',
                'Item Purchased': 'Cucumber',
                'Cost': 0.75})

# Transaction 3: A series made from a dictionary
transaction_3 = pd.Series({'Name': 'Isaiah',
                          'Item Purchased': 'Coffee',
                          'Cost': 2.30})

In [7]:
print(transaction_1)

Name                 Chris
Item Purchased    Dog Food
Cost                  22.5
dtype: object


In [8]:
print(transaction_2)

{'Name': 'Jeremiah', 'Item Purchased': 'Cucumber', 'Cost': 0.75}


In [9]:
print(transaction_3)

Name              Isaiah
Item Purchased    Coffee
Cost                 2.3
dtype: object


In [12]:
# First let's create the dataframe without row index labels
dfv1 = pd.DataFrame([transaction_1,transaction_2,transaction_3])
dfv1

Unnamed: 0,Name,Item Purchased,Cost
0,Chris,Dog Food,22.5
1,Jeremiah,Cucumber,0.75
2,Isaiah,Coffee,2.3


In [13]:
# We could do the same if the we combined 3 dictionaries instead of 3 series
# First let's create the dataframe without row index labels

# Transation 1: A dictionary
transaction_1 = {'Name': 'Chris',
                          'Item Purchased': 'Dog Food',
                          'Cost': 22.50}

#Transaction 2: A dictionary
transaction_2 = {'Name': 'Jeremiah',
                'Item Purchased': 'Cucumber',
                'Cost': 0.75}

# Transaction 3: v
transaction_3 = {'Name': 'Isaiah',
                          'Item Purchased': 'Coffee',
                          'Cost': 2.30}
dfv1 = pd.DataFrame([transaction_1,transaction_2,transaction_3])
dfv1

Unnamed: 0,Cost,Item Purchased,Name
0,22.5,Dog Food,Chris
1,0.75,Cucumber,Jeremiah
2,2.3,Coffee,Isaiah


In [14]:
# Now let's try add indexes to our rows
dfv = pd.DataFrame([transaction_1,transaction_2,transaction_3], index = ['Store 1', 'Store 2', 'Store 3'])
dfv

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 2,0.75,Cucumber,Jeremiah
Store 3,2.3,Coffee,Isaiah


In [19]:
# What if we have a data value of a different type from the rest of the column (e.g. Cost: 'Hello!') 
# What if we were missing a value for one column in one of our dicitonaries and had a value no one else had?
# Transation 1: A dictionary
transaction_1 = {'Name': 'Chris',
                          'Item Purchased': 'Dog Food',
                          'Cost': 22.50}

#Transaction 2: A dictionary
transaction_2 = {'Name': 'Jeremiah',
                'Item Purchased': 'Cucumber',
                'Cost': 0.75}

# Transaction 3: A dictionary
transaction_3 = {'Name': 'Isaiah',
                          'Item Purchased': 'Coffee',
                          'Cost': 2.30}
# Transaction 4: A dictionary
transaction_4 = pd.Series({'Name': 'Hermione', 
                           'Hair Color':'Brown', 
                           'Cost': 'Hello!'})

dfv2 = pd.DataFrame([transaction_1,transaction_2,transaction_3,transaction_4], index = ['Store 1', 'Store 2', 'Store 3','Store 4'])
dfv2

Unnamed: 0,Cost,Hair Color,Item Purchased,Name
Store 1,22.5,,Dog Food,Chris
Store 2,0.75,,Cucumber,Jeremiah
Store 3,2.3,,Coffee,Isaiah
Store 4,Hello!,Brown,,Hermione


## Querying DataFrames

In [21]:
# The square brackets only reference COLUMN NAMES
dfv2['Cost']

Store 1      22.5
Store 2      0.75
Store 3       2.3
Store 4    Hello!
Name: Cost, dtype: object

In [22]:
#Use loc and iloc to get rows
dfv2.iloc[1]

Cost                  0.75
Hair Color             NaN
Item Purchased    Cucumber
Name              Jeremiah
Name: Store 2, dtype: object

In [23]:
#Use loc and iloc to get rows
dfv2.loc['Store 1']

Cost                  22.5
Hair Color             NaN
Item Purchased    Dog Food
Name                 Chris
Name: Store 1, dtype: object

In [25]:
#The result of [], .loc[] and .iloc[] is a SERIES
print(type(dfv2['Cost']))
print(type(dfv2.iloc[1]))
print(type(dfv2.loc['Store 1']))

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>


In [26]:
b = dfv2.loc['Store 3']['Name'] # Problematic
b

'Isaiah'

In [27]:
#  Chaining such as dfv.loc['Store 3']['Name'] is best AVOIDED
# CHaining causes Pandas to return a COPY of the dataframe instead of a VIEW of it.
# For selecting data, this may just be slow
# HOWEVER for CHANGING data chaning a column may cause issues

# Here is a better way
dfv2.loc[:,['Name','Cost']] # Returns ALL ROWS and the columns in the list (['Name','Cost'])

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 2,Jeremiah,0.75
Store 3,Isaiah,2.3
Store 4,Hermione,Hello!


In [28]:
dfv2.loc['Store 2', ['Name','Item Purchased']]

Name              Jeremiah
Item Purchased    Cucumber
Name: Store 2, dtype: object

## Dropping DataFrame Rows and Columns

In [29]:
# THe line below DOES NOT remove Store 1. It returns a COPY of the datafram without Store 1.
# In other words this is NOT an implace operation
dfv.drop('Store 1')

Unnamed: 0,Cost,Hair Color,Item Purchased,Name
Store 2,0.75,,Cucumber,Jeremiah
Store 3,2.3,,Coffee,Isaiah
Store 4,Hello!,Brown,,Hermione


In [31]:
dfv # SEE? # We have to assign this copy to a variable for it to save

Unnamed: 0,Cost,Hair Color,Item Purchased,Name
Store 1,22.5,,Dog Food,Chris
Store 2,0.75,,Cucumber,Jeremiah
Store 3,2.3,,Coffee,Isaiah
Store 4,Hello!,Brown,,Hermione


In [32]:
# Deleting a column, axis = 1 means we are referring to a column
dfv.drop('Cost', axis=1)

Unnamed: 0,Hair Color,Item Purchased,Name
Store 1,,Dog Food,Chris
Store 2,,Cucumber,Jeremiah
Store 3,,Coffee,Isaiah
Store 4,Brown,,Hermione


In [35]:
test = pd.DataFrame([{'Item':'Pack','Category':'Pack','Quantity':1,'Weight':33},
                    {'Item':'Tent','Category':'Shelter','Quantity':1,'Weight':80},
                    {'Item':'Sleeping Pad','Category':'Sleep','Quantity':0,'Weight':27},
                    {'Item':'Sleeping Pad','Category':'Sleep','Quantity':0,'Weight':11}])
test

Unnamed: 0,Category,Item,Quantity,Weight
0,Pack,Pack,1,33
1,Shelter,Tent,1,80
2,Sleep,Sleeping Pad,0,27
3,Sleep,Sleeping Pad,0,11


In [36]:
#Set the item column as the index
test.set_index('Item', inplace = True)
test

Unnamed: 0_level_0,Category,Quantity,Weight
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Pack,Pack,1,33
Tent,Shelter,1,80
Sleeping Pad,Sleep,0,27
Sleeping Pad,Sleep,0,11


In [38]:
# Let's say we want to drop all rows with a 0 quantity
a = test[test['Quantity']==0]
a

Unnamed: 0_level_0,Category,Quantity,Weight
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sleeping Pad,Sleep,0,27
Sleeping Pad,Sleep,0,11


In [39]:
a.index

Index(['Sleeping Pad', 'Sleeping Pad'], dtype='object', name='Item')

In [40]:
# Delete a column
# To permanently delete a row or column INPLACE we can use the del operator
dfvcopy = dfv.copy() # Remember, we want to use .copy(), otherwise dfvcopy will be a reference to dfv. Changes we 
del dfvcopy['Name']
dfvcopy

Unnamed: 0,Cost,Hair Color,Item Purchased
Store 1,22.5,,Dog Food
Store 2,0.75,,Cucumber
Store 3,2.3,,Coffee
Store 4,Hello!,Brown,


In [None]:
# Delete entire DataFrame
del dfvcopy

## Add a Column to a DataFrame

In [42]:
dfv10 = dfv.copy()
dfv10

Unnamed: 0,Cost,Hair Color,Item Purchased,Name
Store 1,22.5,,Dog Food,Chris
Store 2,0.75,,Cucumber,Jeremiah
Store 3,2.3,,Coffee,Isaiah
Store 4,Hello!,Brown,,Hermione


In [45]:
dfv10['Last Name'] = ['Chu','Huang','Ali','Leols']
dfv10

Unnamed: 0,Cost,Hair Color,Item Purchased,Name,Last Name
Store 1,22.5,,Dog Food,Chris,Chu
Store 2,0.75,,Cucumber,Jeremiah,Huang
Store 3,2.3,,Coffee,Isaiah,Ali
Store 4,Hello!,Brown,,Hermione,Leols


## Reading in CSV Files to DataFrames

In [46]:
olympics = pd.read_excel('Data/olympics.xlsx', skiprows = 1, index_col=1)
olympics.head()

Unnamed: 0.1,Unnamed: 0,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),,13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),,12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),,23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),,5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],,2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [49]:
colnames = olympics.columns
print(colnames)

Index(['Unnamed: 0', '№ Summer', '01 !', '02 !', '03 !', 'Total', '№ Winter',
       '01 !.1', '02 !.1', '03 !.1', 'Total.1', '№ Games', '01 !.2', '02 !.2',
       '03 !.2', 'Combined total'],
      dtype='object')


In [50]:
# Remove first column 'Unnamed: 0'
olympics.drop(colnames[0],axis=1, inplace = True) # Inplace changes the actual DataFrame permanently
olympics.head()

Unnamed: 0,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


## Rename a Column

In [51]:
olympics.rename(columns = {'01 !': 'Num 1','02 !': 'Num 2','03 !': 'Num 3'}, inplace = True)
olympics.head()

Unnamed: 0,№ Summer,Num 1,Num 2,Num 3,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [53]:
olympics.rename(columns={'02 !.1':'Gold', '03 !.1':'BLEH'}, inplace = True)
olympics.head()

Unnamed: 0,№ Summer,Num 1,Num 2,Num 3,Total,№ Winter,01 !.1,Gold,BLEH,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


## Querying DataFrames

In [55]:
# Let's find out which countries have at least 10 gold medals
tengold = olympics['Gold'] > 1
tengold #This has all the countries still, just with those countries with <= 10 golds marked as FALSE

Afghanistan (AFG)                               False
Algeria (ALG)                                   False
Argentina (ARG)                                 False
Armenia (ARM)                                   False
Australasia (ANZ) [ANZ]                         False
Australia (AUS) [AUS] [Z]                        True
Austria (AUT)                                    True
Azerbaijan (AZE)                                False
Bahamas (BAH)                                   False
Bahrain (BRN)                                   False
Barbados (BAR) [BAR]                            False
Belarus (BLR)                                    True
Belgium (BEL)                                   False
Bermuda (BER)                                   False
Bohemia (BOH) [BOH] [Z]                         False
Botswana (BOT)                                  False
Brazil (BRA)                                    False
British West Indies (BWI) [BWI]                 False
Bulgaria (BUL) [H]          

In [56]:
# The WHERE() funciton takes a BOOLEAN function, applies it to a dataframe and returns a new dataframe
only_gold = olympics.where(olympics['Gold'] > 0)
only_gold # Note that rows that do not meet the requirements specified by the Boolean funciton return NaN 

Unnamed: 0,№ Summer,Num 1,Num 2,Num 3,Total,№ Winter,01 !.1,Gold,BLEH,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),,,,,,,,,,,,,,,
Algeria (ALG),,,,,,,,,,,,,,,
Argentina (ARG),,,,,,,,,,,,,,,
Armenia (ARM),,,,,,,,,,,,,,,
Australasia (ANZ) [ANZ],,,,,,,,,,,,,,,
Australia (AUS) [AUS] [Z],25.0,139.0,152.0,177.0,468.0,18.0,5.0,3.0,4.0,12.0,43.0,144.0,155.0,181.0,480.0
Austria (AUT),26.0,18.0,33.0,35.0,86.0,22.0,59.0,78.0,81.0,218.0,48.0,77.0,111.0,116.0,304.0
Azerbaijan (AZE),,,,,,,,,,,,,,,
Bahamas (BAH),,,,,,,,,,,,,,,
Bahrain (BRN),,,,,,,,,,,,,,,


In [57]:
# How tp remove rows that are ALL NaN
# The dropna() method from the dataframe class can remove rows/cols where any or all of the data are missing
# DataFrame.dropna(axis=0, how='any' or 'all' , thresh=None, subset=None, inplace=False)
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.dropna.html

only_gold.dropna(axis=0, how='all', inplace = True)
only_gold

Unnamed: 0,№ Summer,Num 1,Num 2,Num 3,Total,№ Winter,01 !.1,Gold,BLEH,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Australia (AUS) [AUS] [Z],25.0,139.0,152.0,177.0,468.0,18.0,5.0,3.0,4.0,12.0,43.0,144.0,155.0,181.0,480.0
Austria (AUT),26.0,18.0,33.0,35.0,86.0,22.0,59.0,78.0,81.0,218.0,48.0,77.0,111.0,116.0,304.0
Belarus (BLR),5.0,12.0,24.0,39.0,75.0,6.0,6.0,4.0,5.0,15.0,11.0,18.0,28.0,44.0,90.0
Belgium (BEL),25.0,37.0,52.0,53.0,142.0,20.0,1.0,1.0,3.0,5.0,45.0,38.0,53.0,56.0,147.0
Bulgaria (BUL) [H],19.0,51.0,85.0,78.0,214.0,19.0,1.0,2.0,3.0,6.0,38.0,52.0,87.0,81.0,220.0
Canada (CAN),25.0,59.0,99.0,121.0,279.0,22.0,62.0,56.0,52.0,170.0,47.0,121.0,155.0,173.0,449.0
China (CHN) [CHN],9.0,201.0,146.0,126.0,473.0,10.0,12.0,22.0,19.0,53.0,19.0,213.0,168.0,145.0,526.0
Croatia (CRO),6.0,6.0,7.0,10.0,23.0,7.0,4.0,6.0,1.0,11.0,13.0,10.0,13.0,11.0,34.0
Czech Republic (CZE) [CZE],5.0,14.0,15.0,15.0,44.0,6.0,7.0,9.0,8.0,24.0,11.0,21.0,24.0,23.0,68.0
Czechoslovakia (TCH) [TCH],16.0,49.0,49.0,45.0,143.0,16.0,2.0,8.0,15.0,25.0,32.0,51.0,57.0,60.0,168.0


In [58]:
# When you input the boolean statement into the square brackets instead of using the where() dataframe method,
# you do not get the NaN values, just a dataframe of rows that return true for the specified boolean statement.
olympics[(olympics['Gold'] > 1) & (olympics['BLEH'] > 10)] # EACH BOOLEAN MASK MUST BE WRAPPED IN PARENTHESES

Unnamed: 0,№ Summer,Num 1,Num 2,Num 3,Total,№ Winter,01 !.1,Gold,BLEH,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304
Canada (CAN),25,59,99,121,279,22,62,56,52,170,47,121,155,173,449
China (CHN) [CHN],9,201,146,126,473,10,12,22,19,53,19,213,168,145,526
Czechoslovakia (TCH) [TCH],16,49,49,45,143,16,2,8,15,25,32,51,57,60,168
Finland (FIN),24,101,84,117,302,22,42,62,57,161,46,143,146,174,463
France (FRA) [O] [P] [Z],27,202,223,246,671,22,31,31,47,109,49,233,254,293,780
Germany (GER) [GER] [Z],15,174,182,217,573,11,78,78,53,209,26,252,260,270,782
East Germany (GDR) [GDR],5,153,129,127,409,6,39,36,35,110,11,192,165,162,519
West Germany (FRG) [FRG],5,56,67,81,204,6,11,15,13,39,11,67,82,94,243
Great Britain (GBR) [GBR] [Z],27,236,272,272,780,22,10,4,12,26,49,246,276,284,806


## Indexing DataFrames

In [85]:
# We previously indexed our dataframe with the Country column while reading it in
test = pd.read_excel('Data\olympics.xlsx', skiprows=1,index_col=1)
test.head()

Unnamed: 0.1,Unnamed: 0,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),,13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),,12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),,23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),,5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],,2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [86]:
# Say we want to change the index from countries to '01 !'
# First we need to preserver the country data
test['Country'] = test.index # Remember index is an attribute of the datafram class not a method so no '()' is required
test.set_index('01 !', inplace=True)

In [87]:
test.head()

Unnamed: 0_level_0,Unnamed: 0,№ Summer,02 !,03 !,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total,Country
01 !,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,,13,0,2,2,0,0,0,0,0,13,0,0,2,2,Afghanistan (AFG)
5,,12,2,8,15,3,0,0,0,0,15,5,2,8,15,Algeria (ALG)
18,,23,24,28,70,18,0,0,0,0,41,18,24,28,70,Argentina (ARG)
1,,5,2,9,12,6,0,0,0,0,11,1,2,9,12,Armenia (ARM)
3,,2,4,5,12,0,0,0,0,0,2,3,4,5,12,Australasia (ANZ) [ANZ]


In [88]:
# How can we remove the index?
test.reset_index(inplace=True)
test.head()

Unnamed: 0.1,01 !,Unnamed: 0,№ Summer,02 !,03 !,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total,Country
0,0,,13,0,2,2,0,0,0,0,0,13,0,0,2,2,Afghanistan (AFG)
1,5,,12,2,8,15,3,0,0,0,0,15,5,2,8,15,Algeria (ALG)
2,18,,23,24,28,70,18,0,0,0,0,41,18,24,28,70,Argentina (ARG)
3,1,,5,2,9,12,6,0,0,0,0,11,1,2,9,12,Armenia (ARM)
4,3,,2,4,5,12,0,0,0,0,0,2,3,4,5,12,Australasia (ANZ) [ANZ]


In [89]:
# Multilevel indexing
test.set_index((['Country', '02 !']), inplace= True)
test.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,01 !,Unnamed: 0,№ Summer,03 !,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Country,02 !,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Afghanistan (AFG),0,0,,13,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),2,5,,12,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),24,18,,23,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),2,1,,5,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],4,3,,2,5,12,0,0,0,0,0,2,3,4,5,12


In [91]:
# to query we can select by index using the follwoign command
test.loc[('Afghanistan (AFG)','0'), ('Virgin Islands (ISV)','1')]

KeyError: ('Afghanistan (AFG)', 0)